library(data.table)
library(dplyr)
library(DT)
library(ggplot2)
library(gtools)
library(vegan)
library(iNEXT)
library(fossil)
library(ggrepel)
Let’s read the dataset and remove the samples containing less than 1154 reads:
setwd("~/Documents/TFM2/TFM2_Taraspina_miTags_analysis/")
#read data
tb18_tax <- read.table(file="~/Documents/TFM2/TFM2_Taraspina_miTags_analysis/input/otu_table99_SUR_miTags_18S_classif.txt", head=TRUE, fill=TRUE)
#table dimensions and format before setting column names
dim(tb18_tax) # 8642 58
tb18_tax[1:5,1:5]
row.names(tb18_tax)<-tb18_tax[,1]
tb18_tax<-tb18_tax[,-1]
tb18_tax[is.na(tb18_tax)]<-"NA"
dim(tb18_tax) #8642 57
tb18_tax[1:5,1:5]
#table with taxonomic classification alone:
tb18_class <- tb18_tax[,56:57]
dim(tb18_class) #8642 2
tb18_class[1:5,1:2]
#table with occurence data alone
tb18_tax_occur <- tb18_tax[,1:55]
dim(tb18_tax_occur) #8642 55
tb18_tax_occur[1:5,1:5]
amplicons_per_sample_tb18<-colSums(tb18_tax_occur)
summary(amplicons_per_sample_tb18) #median of amplicons per sample = ~3086)
#we'll remove all samples with less than 1154 reads.
amplicons_per_sample_tb18[which(colSums(tb18_tax_occur)<1154)]
#remove samples with less than 1154 reads
tb18_tax_occur_min1154 <- tb18_tax_occur[,colSums(tb18_tax_occur) >= 1154]
dim(tb18_tax_occur_min1154) #8642 45
#remove samples with omitted in 16S dataset (so that we can compare the relative abundance of 16S and 18S OTUs considering the same samples)
tb18_tax_occur_min1154<-subset(tb18_tax_occur_min1154, select=-c(TARA_70_SUR_0d2_3,TARA_67_SUR_0d2_3))
dim(tb18_tax_occur_min1154) #8642 43
#let's check if we loose any OTU when excluding the mentioned stations.
tb18_tax_occur_min1154_no_cero<-tb18_tax_occur_min1154[-(which(rowSums(tb18_tax_occur_min1154)==0)),]
dim(tb18_tax_occur_min1154_no_cero)
Table dimensions and content outline:
## [1] 8642 43
## TARA_102_SUR_0d2_3 TARA_109_SUR_0d2_3 TARA_110_SUR_0d2_3
## OTU_1 0 0 0
## OTU_2 0 0 0
## OTU_3 0 0 0
## OTU_4 0 0 0
## OTU_5 0 0 0
## TARA_111_SUR_0d2_3 TARA_112_SUR_0d2_3
## OTU_1 0 0
## OTU_2 0 0
## OTU_3 1 0
## OTU_4 0 0
## OTU_5 0 0
Minimum number of reads per station:
min(colSums(tb18_tax_occur_min1154))
## [1] 1154
Maximum number of reads per station:
max(colSums(tb18_tax_occur_min1154))
## [1] 23781
Identification of stations with higher number of reads:
amplicons_per_sample<-colSums(tb18_tax_occur_min1154)
amplicons_per_sample[which(colSums(tb18_tax_occur_min1154)>20000)]
## TARA_84_SUR_0d2_3
## 23781
Overall reads per sample:
Let’s normalize the original dataset by randomly subsampling 1154 reads in each station:
tb18_tax_occur_min1154_t<-t(tb18_tax_occur_min1154)
tb18_tax_occur_ss1154<-rrarefy(tb18_tax_occur_min1154_t, 1154)
The normalized table shows the following dimensions and format:
## [1] 43 8642
## OTU_1 OTU_2 OTU_3 OTU_4 OTU_5
## TARA_102_SUR_0d2_3 0 0 0 0 0
## TARA_109_SUR_0d2_3 0 0 0 0 0
## TARA_110_SUR_0d2_3 0 0 0 0 0
## TARA_111_SUR_0d2_3 0 0 1 0 0
## TARA_112_SUR_0d2_3 0 0 0 0 0
Its content fits with the expected normalization values (1154 reads per station):
rowSums(tb18_tax_occur_ss1154)
## TARA_102_SUR_0d2_3 TARA_109_SUR_0d2_3 TARA_110_SUR_0d2_3
## 1154 1154 1154
## TARA_111_SUR_0d2_3 TARA_112_SUR_0d2_3 TARA_122_SUR_0d2_3
## 1154 1154 1154
## TARA_123_SUR_0d2_3 TARA_124_SUR_0d2_3 TARA_125_SUR_0d2_3
## 1154 1154 1154
## TARA_128_SUR_0d2_3 TARA_132_SUR_0d2_3 TARA_133_SUR_0d2_3
## 1154 1154 1154
## TARA_137_SUR_0d2_3 TARA_138_SUR_0d2_3 TARA_140_SUR_0d2_3
## 1154 1154 1154
## TARA_142_SUR_0d2_3 TARA_145_SUR_0d2_3 TARA_146_SUR_0d2_3
## 1154 1154 1154
## TARA_148_SUR_0d2_3 TARA_149_SUR_0d2_3 TARA_150_SUR_0d2_3
## 1154 1154 1154
## TARA_151_SUR_0d2_3 TARA_152_SUR_0d2_3 TARA_56_SUR_0d2_3
## 1154 1154 1154
## TARA_57_SUR_0d2_3 TARA_62_SUR_0d2_3 TARA_64_SUR_0d2_3
## 1154 1154 1154
## TARA_65_SUR_0d2_3 TARA_66_SUR_0d2_3 TARA_68_SUR_0d2_3
## 1154 1154 1154
## TARA_72_SUR_0d2_3 TARA_76_SUR_0d2_3 TARA_78_SUR_0d2_3
## 1154 1154 1154
## TARA_82_SUR_0d2_3 TARA_84_SUR_0d2_3 TARA_85_SUR_0d2_3
## 1154 1154 1154
## TARA_96_SUR_0d2_3 TARA_98_SUR_0d2_3 TARA_99_SUR_0d2_3
## 1154 1154 1154
## MP0311 MP1517 MP1672
## 1154 1154 1154
## MP2821
## 1154
Let’s check out how many OTUs don’t appear in the new table:
length(which(colSums(tb18_tax_occur_ss1154)==0))
## [1] 2942
There are 2942 OTUs that don’t show any occurrence in the normalized data. Let’s remove them from the table and take a look at its final dimensions:
tb18_tax_occur_ss1154_no_cero<-tb18_tax_occur_ss1154[,-(which(colSums(tb18_tax_occur_ss1154)==0))]
tb18_tax_occur_ss1154_no_cero<-tb18_tax_occur_ss1154_no_cero[mixedorder(row.names(tb18_tax_occur_ss1154_no_cero)),]
dim(tb18_tax_occur_ss1154_no_cero)
## [1] 43 5700
Datasets summary:
dim(tb18_tax)
## [1] 8642 57
dim(tb18_tax_occur)
## [1] 8642 55
dim(tb18_tax_occur_ss1154_no_cero)
## [1] 43 5700
Most of the samples take Shannon Index values around 6:
Lowest number of OTUs per sample:
## [1] 348
Maximum number of OTUs per sample:
## [1] 791
In most of the samples, we can identify between 600 and 700 OTUs:
plot(sort(OTUs_per_sample_18S_tax_occur_ss1154), pch=19)
boxplot(OTUs_per_sample_18S_tax_occur_ss1154, pch=19)
pielou_evenness_18S_tax_occur_ss1154<-tb18_tax_occur_ss1154_div/log(OTUs_per_sample_18S_tax_occur_ss1154)
The Pielou index (constrained between 0 and 1) takes values closer to 1 as the variation of species proportion in a sample increases. Most of the samples get values around 0.95, meaning that the numerical composition of different OTUs in a sample is highly variable - there’s no constant presence of dominant species.
The less variation in communities between the species (and the presence of a dominant specie), the lower J’ is.
plot(sort(pielou_evenness_18S_tax_occur_ss1154), pch=19)
boxplot(pielou_evenness_18S_tax_occur_ss1154, pch=19)
The OTU_38, with 874 reads, is the most abundant in the overall dataset:
head(sort(colSums(tb18_tax_occur_ss1154_no_cero), decreasing=T), n=10L)
## OTU_38 OTU_3677 OTU_5415 OTU_751 OTU_3374 OTU_2494 OTU_2631 OTU_3569
## 874 695 571 524 429 361 310 282
## OTU_3177 OTU_1032
## 270 267
Most of the OTUs show very few occurrences; suggesting that we will probably be able to identify a significant ammount of rare otus:
plot(log(sort(colSums(tb18_tax_occur_ss1154_no_cero), decreasing=T)), pch=19)
The OTUs abundance distribution fits relativelly close to log-normal model.
According to Preston’s lognormal model fit into species frequencies groups, we’re missing 1588.816 species:
tb18_tax_occur_ss1154_prestonfit<-prestonfit(colSums(tb18_tax_occur_min1154_t))
plot(tb18_tax_occur_ss1154_prestonfit, main="Pooled species")
veiledspec(tb18_tax_occur_ss1154_prestonfit)
## Extrapolated Observed Veiled
## 9937.816 8349.000 1588.816
When computing Prestons’ lognormal model fit without pooling data into groups, we seem to miss 1385.371 species:
## Extrapolated Observed Veiled
## 9734.371 8349.000 1385.371
rarec_input<-t(as.matrix(colSums(tb18_tax_occur_ss1154_no_cero)))
#tb18_rarecurve_step1000_10000<-rarecurve(rarec_input, step = 1000, 10000, xlab = "Sample size", ylab = "OTUs", label = TRUE, main="18S amplicons diversity step=1000 & ss=10000\n(40,816 OTUs; 5,247,375 reads)\n")
#tb18_rarecurve_step1000_10000
tb18_rarecurve_step100_5700<-rarecurve(rarec_input, step = 100, 5700, xlab = "Sample size", ylab = "OTUs", label = TRUE, main="18S amplicons diversity step=100 & ss=5700\n(5,719 OTUs; 49,622 reads)\n")
#without rarefying
rarec_allOTUs_input<-t(as.matrix(colSums(t(tb18_tax_occur))))
tb18_rarecurve_allOTUs_step100_8642<-rarecurve(rarec_allOTUs_input, step = 100, 8642, xlab = "Sample size", ylab = "OTUs", label = TRUE, main="18S amplicons diversity non-rarefied step=100 & ss=8642\n(8,642 OTUs; 244,184 reads)\n")
The Bray-Curtis dissimilarity, constrained between 0 (minimum distance) and 1 (highest dissimilarity) allows us to quantify the differences between samples according to the composition and relative abundance of their OTUs. In our dataset, most of the samples pairs take dissimilarity values around 0.8, meaning that their composition is substantially different.
The stations seem to form clusters according to geographic localization, but there are no evident clusters separated from the general groups, apart from the one containing TARA_84, TARA_82 and TARA_85.
(To be done: assign Longhurst provinces information to each station and check if any of the central clusters is meaningful regarding to the samples’ geographical location)
We can identify a prominent group in the central part of the NMDS plot and a few outliers (TARA 82, 84 and 85) in the central-right edge of the plot. The stress parameter takes a value below 0.2, suggesting that the plot is acceptable.
##
## Call:
## monoMDS(dist = tb18_tax_occur_ss1154_no_cero.bray)
##
## Non-metric Multidimensional Scaling
##
## 43 points, dissimilarity 'bray', call 'vegdist(x = tb18_tax_occur_ss1154_no_cero, method = "bray")'
##
## Dimensions: 2
## Stress: 0.1463937
## Stress type 1, weak ties
## Scores scaled to unit root mean square, rotated to principal components
## Stopped after 70 iterations: Stress nearly unchanged (ratio > sratmax)
When implementing a most robut function for computing NMDS plots, the result is quiet the same:
## Run 0 stress 0.1465317
## Run 1 stress 0.1710814
## Run 2 stress 0.1529505
## Run 3 stress 0.1529514
## Run 4 stress 0.1463796
## ... New best solution
## ... Procrustes: rmse 0.0160749 max resid 0.09410719
## Run 5 stress 0.1450501
## ... New best solution
## ... Procrustes: rmse 0.03346756 max resid 0.2057251
## Run 6 stress 0.1544138
## Run 7 stress 0.1529513
## Run 8 stress 0.1464534
## Run 9 stress 0.1645093
## Run 10 stress 0.1464532
## Run 11 stress 0.1529505
## Run 12 stress 0.145052
## ... Procrustes: rmse 0.0005393131 max resid 0.002027162
## ... Similar to previous best
## Run 13 stress 0.1450487
## ... New best solution
## ... Procrustes: rmse 0.002278156 max resid 0.01146875
## Run 14 stress 0.154718
## Run 15 stress 0.154417
## Run 16 stress 0.1450194
## ... New best solution
## ... Procrustes: rmse 0.003631269 max resid 0.01466868
## Run 17 stress 0.1546418
## Run 18 stress 0.1522817
## Run 19 stress 0.1522815
## Run 20 stress 0.1544133
## *** No convergence -- monoMDS stopping criteria:
## 20: stress ratio > sratmax
## Warning in ordiplot(x, choices = choices, type = type, display = display, :
## Species scores not available
## Warning in if (class(lats) == "SpatialPoints") lats <- coordinates(lats):
## the condition has length > 1 and only the first element will be used
Working datasets:
dim(tb18_tax_occur_ss1154_no_cero)
## [1] 43 5728
tb18_tax_occur_ss1154_no_cero[1:5, 1:5]
## OTU_1 OTU_2 OTU_3 OTU_5 OTU_6
## MP0311 0 0 0 0 0
## MP1517 0 0 0 0 1
## MP1672 0 0 0 0 0
## MP2821 0 3 0 0 0
## TARA_102_SUR_0d2_3 0 0 0 0 0
tb18_tax_occur_ss1154_no_cero.bray<-as.matrix(tb18_tax_occur_ss1154_no_cero.bray)
## [1] 43 43
dim(geo_distances_MP_18S)
## [1] 43 43
Communities quickly change their composition across geographical distances:
plot(geo_distances_MP_18S, tb18_tax_occur_ss1154_no_cero.bray, pch=19, cex=0.4, xlab="Geopgraphical distances", ylab="Bray-Curtis dissimilarities")
Mantel statistic is -significantlly- so low, meaning that the correlation between samples dissimilarity and geographical distances is weak.
mantel(geo_distances_MP_18S, tb18_tax_occur_ss1154_no_cero.bray)
##
## Mantel statistic based on Pearson's product-moment correlation
##
## Call:
## mantel(xdis = geo_distances_MP_18S, ydis = tb18_tax_occur_ss1154_no_cero.bray)
##
## Mantel statistic r: 0.1186
## Significance: 0.018
##
## Upper quantiles of permutations (null model):
## 90% 95% 97.5% 99%
## 0.0646 0.0894 0.1075 0.1334
## Permutation: free
## Number of permutations: 999
Maximum distance between samples:
## [1] 19543.94
Minimum distance between samples:
## [1] 0
Correlograms:
MP_18s_ss1154_mantel_correl_by_1000km<-mantel.correlog(tb18_tax_occur_ss1154_no_cero.bray, D.geo=geo_distances_MP_18S, break.pts=seq(0,20000, by=1000))
plot(MP_18s_ss1154_mantel_correl_by_1000km)
MP_18s_ss1154_mantel_correl_by_100km<-mantel.correlog(tb18_tax_occur_ss1154_no_cero.bray, D.geo=geo_distances_MP_18S, break.pts=seq(0,20000, by=100))
plot(MP_18s_ss1154_mantel_correl_by_100km)
In the following plot, we can appreciate the OTUs distribution according to their percentage of occurence and relative abundance. The red line keeps up OTUs that occur in more than 80% of the samples, the green line limits regionally rare OTUs (< 0.001%), and the blue one restricts regionally abundant OTUs (> 0.1%).
Regionally abundant OTUs (relative abundance over 0.1%):
tb18_class_prov<-cbind(otu_names=row.names(tb18_class),tb18_class)
## otu_names mean_rabund perc_occur classif
## 1 OTU_1001 0.001188989 60.465116 <NA>
## 2 OTU_1003 0.001027770 62.790698 <NA>
## 3 OTU_1012 0.001007618 60.465116 <NA>
## 4 OTU_1018 0.001874169 13.953488 Prymnesiophyceae
## 5 OTU_1032 0.005300069 72.093023 Prymnesiophyceae
## 6 OTU_1047 0.001813712 67.441860 <NA>
## 7 OTU_1066 0.001330055 58.139535 Prymnesiophyceae
## 8 OTU_1087 0.001289751 55.813953 <NA>
## 9 OTU_1261 0.001128532 58.139535 <NA>
## 10 OTU_1276 0.001007618 48.837209 <NA>
## 11 OTU_1343 0.001712950 69.767442 Dinophyceae
## 12 OTU_1383 0.003627423 88.372093 <NA>
## 13 OTU_1395 0.001330055 25.581395 <NA>
## 14 OTU_1446 0.001289751 69.767442 <NA>
## 15 OTU_1456 0.002901939 72.093023 Dinophyceae
## 16 OTU_1496 0.001551731 69.767442 <NA>
## 17 OTU_1551 0.001350208 65.116279 <NA>
## 18 OTU_1597 0.003385595 69.767442 Prymnesiophyceae
## 19 OTU_1628 0.001229293 58.139535 Dinophyceae
## 20 OTU_1635 0.001450969 69.767442 <NA>
## 21 OTU_1641 0.001612188 74.418605 Dinophyceae
## 22 OTU_1694 0.001289751 55.813953 <NA>
## 23 OTU_1746 0.001793559 53.488372 Prymnesiophyceae
## 24 OTU_1825 0.001108379 55.813953 <NA>
## 25 OTU_1833 0.002720567 72.093023 <NA>
## 26 OTU_1901 0.001047922 48.837209 <NA>
## 27 OTU_1911 0.001068075 48.837209 <NA>
## 28 OTU_1921 0.003687880 76.744186 Prymnesiophyceae
## 29 OTU_1929 0.001330055 48.837209 Mamiellophyceae
## 30 OTU_1975 0.001027770 58.139535 <NA>
## 31 OTU_1997 0.001471122 74.418605 Prymnesiophyceae
## 32 OTU_2031 0.002377978 67.441860 <NA>
## 33 OTU_2043 0.001632340 60.465116 <NA>
## 34 OTU_2078 0.001209141 67.441860 <NA>
## 35 OTU_2086 0.001068075 65.116279 Dinophyceae
## 36 OTU_2089 0.001108379 46.511628 <NA>
## 37 OTU_2120 0.001390512 58.139535 <NA>
## 38 OTU_2124 0.001229293 72.093023 Dictyochophyceae
## 39 OTU_2158 0.001168836 67.441860 Prymnesiophyceae
## 40 OTU_216 0.001531579 67.441860 Dinophyceae
## 41 OTU_2189 0.001330055 58.139535 <NA>
## 42 OTU_2195 0.001733102 72.093023 Dinophyceae
## 43 OTU_2200 0.003728185 86.046512 <NA>
## 44 OTU_2207 0.001450969 72.093023 Dinophyceae
## 45 OTU_2219 0.001027770 67.441860 Prymnesiophyceae
## 46 OTU_2269 0.001753255 86.046512 <NA>
## 47 OTU_2305 0.001612188 74.418605 <NA>
## 48 OTU_2456 0.001128532 58.139535 <NA>
## 49 OTU_2466 0.001007618 46.511628 <NA>
## 50 OTU_2494 0.006932409 69.767442 Prymnesiophyceae
## 51 OTU_2518 0.001551731 53.488372 <NA>
## 52 OTU_2553 0.001974930 83.720930 <NA>
## 53 OTU_256 0.001773407 76.744186 <NA>
## 54 OTU_2594 0.001309903 69.767442 <NA>
## 55 OTU_2631 0.006106163 74.418605 Prymnesiophyceae
## 56 OTU_2648 0.001712950 83.720930 <NA>
## 57 OTU_2654 0.002115997 65.116279 Pelagophyceae
## 58 OTU_2686 0.001632340 72.093023 <NA>
## 59 OTU_2730 0.001289751 53.488372 <NA>
## 60 OTU_2853 0.001350208 69.767442 Prymnesiophyceae
## 61 OTU_2868 0.001712950 48.837209 <NA>
## 62 OTU_2873 0.001148684 72.093023 <NA>
## 63 OTU_2887 0.001350208 69.767442 <NA>
## 64 OTU_2906 0.001027770 48.837209 <NA>
## 65 OTU_2910 0.004070775 72.093023 <NA>
## 66 OTU_295 0.001027770 58.139535 <NA>
## 67 OTU_2965 0.001733102 67.441860 <NA>
## 68 OTU_2966 0.001128532 58.139535 <NA>
## 69 OTU_2986 0.001249446 60.465116 <NA>
## 70 OTU_2996 0.002055540 69.767442 <NA>
## 71 OTU_3018 0.001612188 72.093023 <NA>
## 72 OTU_3038 0.002539196 76.744186 <NA>
## 73 OTU_3056 0.001934626 72.093023 <NA>
## 74 OTU_3124 0.001450969 58.139535 <NA>
## 75 OTU_3177 0.005682963 48.837209 Mamiellophyceae
## 76 OTU_3196 0.001168836 58.139535 Prymnesiophyceae
## 77 OTU_3227 0.003466205 34.883721 Prasinophyceae_clade-VII
## 78 OTU_3271 0.002357825 32.558140 Prasinophyceae_clade-VII
## 79 OTU_3282 0.004211842 37.209302 Mamiellophyceae
## 80 OTU_3374 0.008826730 93.023256 Pelagophyceae
## 81 OTU_3430 0.001390512 34.883721 Mamiellophyceae
## 82 OTU_3454 0.003607271 72.093023 <NA>
## 83 OTU_3560 0.001974930 62.790698 Pelagophyceae
## 84 OTU_3569 0.005159002 53.488372 Mamiellophyceae
## 85 OTU_3602 0.001410665 65.116279 <NA>
## 86 OTU_363 0.001007618 44.186047 <NA>
## 87 OTU_3677 0.013884970 81.395349 Prymnesiophyceae
## 88 OTU_3729 0.003909556 79.069767 Pelagophyceae
## 89 OTU_373 0.001007618 51.162791 <NA>
## 90 OTU_38 0.017492241 88.372093 Pelagophyceae
## 91 OTU_380 0.002156302 83.720930 Prymnesiophyceae
## 92 OTU_3813 0.001007618 39.534884 <NA>
## 93 OTU_3871 0.002156302 69.767442 <NA>
## 94 OTU_3926 0.002015235 69.767442 Pelagophyceae
## 95 OTU_4089 0.001430817 37.209302 Mamiellophyceae
## 96 OTU_4090 0.001551731 30.232558 Mamiellophyceae
## 97 OTU_4103 0.002418282 37.209302 Mamiellophyceae
## 98 OTU_4110 0.001027770 41.860465 Mamiellophyceae
## 99 OTU_4123 0.002519044 34.883721 Prasinophyceae_clade-VII
## 100 OTU_4125 0.002599653 79.069767 Pelagophyceae
## 101 OTU_4134 0.001733102 48.837209 Pelagophyceae
## 102 OTU_4144 0.003244529 44.186047 Mamiellophyceae
## 103 OTU_4176 0.003385595 34.883721 Mamiellophyceae
## 104 OTU_4190 0.001249446 30.232558 Mamiellophyceae
## 105 OTU_4203 0.001350208 53.488372 Pelagophyceae
## 106 OTU_4226 0.001672645 37.209302 Mamiellophyceae
## 107 OTU_4227 0.003204224 32.558140 Prasinophyceae_clade-VII
## 108 OTU_4260 0.001793559 41.860465 Mamiellophyceae
## 109 OTU_437 0.001450969 76.744186 Prymnesiophyceae
## 110 OTU_4518 0.002377978 44.186047 Mamiellophyceae
## 111 OTU_4533 0.001551731 39.534884 Mamiellophyceae
## 112 OTU_458 0.001108379 60.465116 Prymnesiophyceae
## 113 OTU_5120 0.004070775 39.534884 Mamiellophyceae
## 114 OTU_5415 0.010761356 25.581395 <NA>
## 115 OTU_5484 0.001188989 34.883721 Mamiellophyceae
## 116 OTU_5750 0.001108379 25.581395 Mamiellophyceae
## 117 OTU_599 0.001068075 48.837209 <NA>
## 118 OTU_6181 0.001229293 23.255814 <NA>
## 119 OTU_628 0.001229293 58.139535 Dinophyceae
## 120 OTU_6608 0.001108379 18.604651 <NA>
## 121 OTU_717 0.001833864 76.744186 Dinophyceae
## 122 OTU_751 0.010983032 37.209302 Prasinophyceae_clade-VII
## 123 OTU_758 0.001874169 65.116279 <NA>
## 124 OTU_793 0.001430817 60.465116 <NA>
## 125 OTU_825 0.001148684 44.186047 <NA>
## 126 OTU_884 0.001390512 76.744186 <NA>
## 127 OTU_893 0.002015235 55.813953 <NA>
## 128 OTU_896 0.002297368 74.418605 <NA>
## 129 OTU_914 0.001128532 58.139535 Prymnesiophyceae
## 130 OTU_922 0.001229293 58.139535 <NA>
## 131 OTU_933 0.004796260 90.697674 Prymnesiophyceae
## 132 OTU_938 0.001188989 67.441860 <NA>
## 133 OTU_949 0.002055540 65.116279 Dictyochophyceae
## 134 OTU_950 0.002317520 4.651163 Prymnesiophyceae
## 135 OTU_973 0.001934626 67.441860 <NA>
## 136 OTU_974 0.002680263 67.441860 Prymnesiophyceae
## 137 OTU_983 0.001269598 55.813953 Dictyochophyceae
## 138 OTU_990 0.001188989 67.441860 <NA>
## [1] 138 4
Proportion of regionally abundant OTUs (%):
## [1] 2.409218
Cosmopolitan OTUs (relative abundance over 0.1% and occurence in more than 80% of samples):
otu_tb18_ss1154_cosmop_sorted_prov<-cbind(otu_names=row.names(otu_tb18_ss1154_cosmop_sorted),otu_tb18_ss1154_cosmop_sorted)
## otu_names mean_rabund perc_occur classif
## 1 OTU_1383 0.003627423 88.37209 <NA>
## 2 OTU_2200 0.003728185 86.04651 <NA>
## 3 OTU_2269 0.001753255 86.04651 <NA>
## 4 OTU_2553 0.001974930 83.72093 <NA>
## 5 OTU_2648 0.001712950 83.72093 <NA>
## 6 OTU_3374 0.008826730 93.02326 Pelagophyceae
## 7 OTU_3677 0.013884970 81.39535 Prymnesiophyceae
## 8 OTU_38 0.017492241 88.37209 Pelagophyceae
## 9 OTU_380 0.002156302 83.72093 Prymnesiophyceae
## 10 OTU_933 0.004796260 90.69767 Prymnesiophyceae
## [1] 10 4
Number and proportion (%) of cosmopolitan OTUs:
## [1] 10
## [1] 0.174581
Number and proportion (%) of rare OTUs:
## [1] 0
## [1] 0
Let’s add the taxonomic classification by merging “tb18_tax_occur_ss1154_no_cero” with “tb18_tax”:
dim(tb18_ss1154_tax_sorted)
## [1] 5728 46
tb18_ss1154_tax_sorted[1:5,1:5]
## MP0311 MP1517 MP1672 MP2821 TARA_102_SUR_0d2_3
## OTU_1 0 0 0 0 0
## OTU_2 0 0 0 3 0
## OTU_3 0 0 0 0 0
## OTU_5 0 0 0 0 0
## OTU_6 0 1 0 0 0
Selection of phototrophs:
tb18_phototrophs <- tb18_ss1154_tax_sorted[which(tb18_ss1154_tax_sorted$classif != "NA"),]
dim(tb18_phototrophs)
## [1] 1905 46
#create a table per group and count in how many samples they occur.
Dinophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Dinophyceae"),]
Dinophyceae_tb[1:5,1:5]
## MP0311 MP1517 MP1672 MP2821 TARA_102_SUR_0d2_3
## OTU_14 0 0 0 0 0
## OTU_19 0 0 0 0 0
## OTU_29 0 0 0 0 0
## OTU_72 0 0 0 0 0
## OTU_74 0 0 0 0 0
Dinophyceae_tb_occur <- Dinophyceae_tb[,1:43]
Dinophyceae_tb_occur[1:5,1:5]
## MP0311 MP1517 MP1672 MP2821 TARA_102_SUR_0d2_3
## OTU_14 0 0 0 0 0
## OTU_19 0 0 0 0 0
## OTU_29 0 0 0 0 0
## OTU_72 0 0 0 0 0
## OTU_74 0 0 0 0 0
dim(Dinophyceae_tb_occur)
## [1] 1044 43
length(Dinophyceae_tb_occur[,colSums(Dinophyceae_tb_occur) > 0])
## [1] 43
#Dinophyceae_tb_samples <- Dinophyceae_tb_occur[,colSums(Dinophyceae_tb_occur) > 0]
#length(Dinophyceae_tb_samples[which(colSums(Dinophyceae_tb_occur) != 0)])
Prasinophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "other_Prasinophyceae"),]
Prasinophyceae_tb_occur <- Prasinophyceae_tb[,1:43]
length(Prasinophyceae_tb_occur[,colSums(Prasinophyceae_tb_occur) > 0])
## [1] 32
Chrysophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Chrysophyceae"),]
Chrysophyceae_tb_occur <- Chrysophyceae_tb[,1:43]
length(Chrysophyceae_tb_occur[,colSums(Chrysophyceae_tb_occur) > 0])
## [1] 41
Pelagophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Pelagophyceae"),]
Pelagophyceae_tb_occur <- Pelagophyceae_tb[,1:43]
length(Pelagophyceae_tb_occur[,colSums(Pelagophyceae_tb_occur) > 0])
## [1] 43
Dictyochophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Dictyochophyceae"),]
Dictyochophyceae_tb_occur <- Dictyochophyceae_tb[,1:43]
length(Dictyochophyceae_tb_occur[,colSums(Dictyochophyceae_tb_occur) > 0])
## [1] 43
Cryptomonadales_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Cryptophyceae"),]
Cryptomonadales_tb_occur <- Cryptomonadales_tb[,1:43]
length(Cryptomonadales_tb_occur[,colSums(Cryptomonadales_tb_occur) > 0])
## [1] 27
Bacillariophyta_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Bacillariophyceae"),]
Bacillariophyta_tb_occur <- Bacillariophyta_tb[,1:43]
length(Bacillariophyta_tb_occur[,colSums(Bacillariophyta_tb_occur) > 0])
## [1] 39
Chlorarachniophyta_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Chlorarachniophyceae"),]
Chlorarachniophyta_tb_occur <- Chlorarachniophyta_tb[,1:43]
length(Chlorarachniophyta_tb_occur[,colSums(Chlorarachniophyta_tb_occur) > 0])
## [1] 34
Bolidophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Bolidophyceae"),]
Bolidophyceae_tb_occur <- Bolidophyceae_tb[,1:43]
length(Bolidophyceae_tb_occur[,colSums(Bolidophyceae_tb_occur) > 0])
## [1] 35
Pinguiochysidales_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Pinguiophyceae"),]
Pinguiochysidales_tb_occur <- Pinguiochysidales_tb[,1:43]
length(Pinguiochysidales_tb_occur[,colSums(Pinguiochysidales_tb_occur) > 0])
## [1] 0
Prymnesiophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Prymnesiophyceae"),]
Prymnesiophyceae_tb_occur <- Prymnesiophyceae_tb[,1:43]
length(Prymnesiophyceae_tb_occur[,colSums(Prymnesiophyceae_tb_occur) > 0])
## [1] 43
Mamiellophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Mamiellophyceae"),]
Mamiellophyceae_tb_occur <- Mamiellophyceae_tb[,1:43]
length(Mamiellophyceae_tb_occur[,colSums(Mamiellophyceae_tb_occur) > 0])
## [1] 39
Eustigmatales_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Eustigmatophyceae"),]
Eustigmatales_tb_occur <- Eustigmatales_tb[,1:43]
length(Eustigmatales_tb_occur[,colSums(Eustigmatales_tb_occur) > 0])
## [1] 35
Chlorophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Chlorophyceae"),]
Chlorophyceae_tb_occur <- Chlorophyceae_tb[,1:43]
length(Chlorophyceae_tb_occur[,colSums(Chlorophyceae_tb_occur) > 0])
## [1] 18
Ulvophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Ulvophyceae"),]
Ulvophyceae_tb_occur <- Ulvophyceae_tb[,1:43]
length(Ulvophyceae_tb_occur[,colSums(Ulvophyceae_tb_occur) > 0])
## [1] 2
Raphydophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Raphydophyceae"),]
Raphydophyceae_tb_occur <- Raphydophyceae_tb[,1:43]
length(Raphydophyceae_tb_occur[,colSums(Raphydophyceae_tb_occur) > 0])
## [1] 0
Trebouxiophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Trebouxiophyceae"),]
Trebouxiophyceae_tb_occur <- Trebouxiophyceae_tb[,1:43]
length(Trebouxiophyceae_tb_occur[,colSums(Trebouxiophyceae_tb_occur) > 0])
## [1] 14
Phaeophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Phaeophyceae"),]
Phaeophyceae_tb_occur <- Phaeophyceae_tb[,1:43]
length(Phaeophyceae_tb_occur[,colSums(Phaeophyceae_tb_occur) > 0])
## [1] 2
Phaeothamniophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Phaeothamniophyceae"),]
Phaeothamniophyceae_tb_occur <- Phaeothamniophyceae_tb[,1:43]
length(Phaeothamniophyceae_tb_occur[,colSums(Phaeothamniophyceae_tb_occur) > 0])
## [1] 0
Xanthophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Xanthophyceae"),]
Xanthophyceae_tb_occur <- Xanthophyceae_tb[,1:43]
length(Xanthophyceae_tb_occur[,colSums(Xanthophyceae_tb_occur) > 0])
## [1] 5
Chlorodendrophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Chlorodendrophyceae"),]
Chlorodendrophyceae_tb_occur <- Chlorodendrophyceae_tb[,1:43]
length(Chlorodendrophyceae_tb_occur[,colSums(Chlorodendrophyceae_tb_occur) > 0])
## [1] 0
IncertaeSedis_Archaeplastida_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "IncertaeSedis_Archaeplastida"),]
IncertaeSedis_Archaeplastida_tb_occur <- IncertaeSedis_Archaeplastida_tb[,1:43]
length(IncertaeSedis_Archaeplastida_tb_occur[,colSums(IncertaeSedis_Archaeplastida_tb_occur) > 0])
## [1] 0
Nephroselmidophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Nephroselmidophyceae"),]
Nephroselmidophyceae_tb_occur <- Nephroselmidophyceae_tb[,1:43]
length(Nephroselmidophyceae_tb_occur[,colSums(Nephroselmidophyceae_tb_occur) > 0])
## [1] 12
Pavlovophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Pavlovophyceae"),]
Pavlovophyceae_tb_occur <- Pavlovophyceae_tb[,1:43]
length(Pavlovophyceae_tb_occur[,colSums(Pavlovophyceae_tb_occur) > 0])
## [1] 25
Rhodophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Rhodophyceae"),]
Rhodophyceae_tb_occur <- Rhodophyceae_tb[,1:43]
length(Rhodophyceae_tb_occur[,colSums(Rhodophyceae_tb_occur) > 0])
## [1] 4
Rappemonads_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Rappemonads"),]
Rappemonads_tb_occur <- Rappemonads_tb[,1:43]
length(Rappemonads_tb_occur[,colSums(Rappemonads_tb_occur) > 0])
## [1] 0
MOCH_1_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "MOCH-1"),]
MOCH_1_tb_occur <- MOCH_1_tb[,1:43]
length(MOCH_1_tb_occur[,colSums(MOCH_1_tb_occur) > 0])
## [1] 0
MOCH_2_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "MOCH-2"),]
MOCH_2_tb_occur <- MOCH_2_tb[,1:43]
length(MOCH_2_tb_occur[,colSums(MOCH_2_tb_occur) > 0])
## [1] 0
MOCH_5_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "MOCH-5"),]
MOCH_5_tb_occur <- MOCH_5_tb[,1:43]
length(MOCH_5_tb_occur[,colSums(MOCH_5_tb_occur) > 0])
## [1] 0
Prasinophyceae_clade_VII_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Prasinophyceae_clade-VII"),]
Prasinophyceae_clade_VII_tb_occur <- Prasinophyceae_clade_VII_tb[,1:43]
length(Prasinophyceae_clade_VII_tb_occur[,colSums(Prasinophyceae_clade_VII_tb_occur) > 0])
## [1] 36
Prasinophyceae_clade_IX_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Prasinophyceae_clade-IX"),]
Prasinophyceae_clade_IX_tb_occur <- Prasinophyceae_clade_IX_tb[,1:43]
length(Prasinophyceae_clade_IX_tb_occur[,colSums(Prasinophyceae_clade_IX_tb_occur) > 0])
## [1] 16
Pyramimonadaceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Pyramimonadaceae"),]
Pyramimonadaceae_tb_occur <- Pyramimonadaceae_tb[,1:43]
length(Pyramimonadaceae_tb_occur[,colSums(Pyramimonadaceae_tb_occur) > 0])
## [1] 21
## reads_per_class OTUs_per_class samples_per_class
## Dinophyceae 5453 1044 43
## Prymnesiophyceae 5180 165 43
## Mamiellophyceae 3173 102 39
## Pelagophyceae 2206 23 43
## Prasinophyceae_clade-VII 1367 30 36
## Dictyochophyceae 622 48 43
## Chrysophyceae 599 134 41
## Cryptophyceae 380 57 27
## Bacillariophyceae 287 125 39
## Chlorarachniophyceae 136 16 34
## Bolidophyceae 132 21 35
## Pyramimonadaceae 111 14 21
## other_Prasinophyceae 93 19 32
## Eustigmatophyceae 88 19 35
## Prasinophyceae_clade-IX 53 13 16
## Pavlovophyceae 51 5 25
## Trebouxiophyceae 39 30 14
## Chlorophyceae 25 20 18
## Nephroselmidophyceae 19 7 12
## Xanthophyceae 5 4 5
## Rhodophyceae 4 4 4
## Ulvophyceae 4 3 2
## Phaeophyceae 2 2 2
## reads_per_class OTUs_per_class samples_per_class
## 100.000 100.000 1416.279
## reads_per_class OTUs_per_class samples_per_class
## Dinophyceae 27.225522992 54.8031496 100.000000
## Prymnesiophyceae 25.862499376 8.6614173 100.000000
## Mamiellophyceae 15.842029058 5.3543307 90.697674
## Pelagophyceae 11.014029657 1.2073491 100.000000
## Prasinophyceae_clade-VII 6.825103600 1.5748031 83.720930
## Dictyochophyceae 3.105497029 2.5196850 100.000000
## Chrysophyceae 2.990663538 7.0341207 95.348837
## Cryptophyceae 1.897248989 2.9921260 62.790698
## Bacillariophyceae 1.432922263 6.5616798 90.697674
## Chlorarachniophyceae 0.679015428 0.8398950 79.069767
## Bolidophyceae 0.659044386 1.1023622 81.395349
## Pyramimonadaceae 0.554196415 0.7349081 48.837209
## other_Prasinophyceae 0.464326726 0.9973753 74.418605
## Eustigmatophyceae 0.439362924 0.9973753 81.395349
## Prasinophyceae_clade-IX 0.264616306 0.6824147 37.209302
## Pavlovophyceae 0.254630785 0.2624672 58.139535
## Trebouxiophyceae 0.194717659 1.5748031 32.558140
## Chlorophyceae 0.124819012 1.0498688 41.860465
## Nephroselmidophyceae 0.094862449 0.3674541 27.906977
## Xanthophyceae 0.024963802 0.2099738 11.627907
## Rhodophyceae 0.019971042 0.2099738 9.302326
## Ulvophyceae 0.019971042 0.1574803 4.651163
## Phaeophyceae 0.009985521 0.1049869 4.651163
Reads per class vs. OTUs per class:
Reads per class vs. samples in which they occurr:
Let’s add the taxonomic classification by merging “tb18_tax_occur_min1154_t” with “tb18_tax”:
dim(tb18_min1154_tax_sorted)
## [1] 8349 46
tb18_min1154_tax_sorted[1:5,1:5]
## TARA_102_SUR_0d2_3 TARA_109_SUR_0d2_3 TARA_110_SUR_0d2_3
## OTU_1 0 0 0
## OTU_2 0 0 0
## OTU_3 0 0 0
## OTU_4 0 0 0
## OTU_5 0 0 0
## TARA_111_SUR_0d2_3 TARA_112_SUR_0d2_3
## OTU_1 0 0
## OTU_2 0 0
## OTU_3 1 0
## OTU_4 0 0
## OTU_5 0 0
Selection of phototrophs:
## [1] 2882 46
#create a table per group and count in how many samples they occur.
Dinophyceae_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Dinophyceae"),]
Dinophyceae_tb[1:5,1:5]
## TARA_102_SUR_0d2_3 TARA_109_SUR_0d2_3 TARA_110_SUR_0d2_3
## OTU_14 0 0 0
## OTU_19 0 0 0
## OTU_29 0 1 0
## OTU_34 0 0 0
## OTU_72 0 0 0
## TARA_111_SUR_0d2_3 TARA_112_SUR_0d2_3
## OTU_14 0 0
## OTU_19 0 0
## OTU_29 0 0
## OTU_34 0 0
## OTU_72 0 2
Dinophyceae_tb_occur <- Dinophyceae_tb[,1:43]
Dinophyceae_tb_occur[1:5,1:5]
## TARA_102_SUR_0d2_3 TARA_109_SUR_0d2_3 TARA_110_SUR_0d2_3
## OTU_14 0 0 0
## OTU_19 0 0 0
## OTU_29 0 1 0
## OTU_34 0 0 0
## OTU_72 0 0 0
## TARA_111_SUR_0d2_3 TARA_112_SUR_0d2_3
## OTU_14 0 0
## OTU_19 0 0
## OTU_29 0 0
## OTU_34 0 0
## OTU_72 0 2
dim(Dinophyceae_tb_occur)
## [1] 1476 43
length(Dinophyceae_tb_occur[,colSums(Dinophyceae_tb_occur) > 0])
## [1] 43
#Dinophyceae_tb_samples <- Dinophyceae_tb_occur[,colSums(Dinophyceae_tb_occur) > 0]
#length(Dinophyceae_tb_samples[which(colSums(Dinophyceae_tb_occur) != 0)])
Prasinophyceae_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "other_Prasinophyceae"),]
Prasinophyceae_tb_occur <- Prasinophyceae_tb[,1:43]
length(Prasinophyceae_tb_occur[,colSums(Prasinophyceae_tb_occur) > 0])
## [1] 41
Chrysophyceae_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Chrysophyceae"),]
Chrysophyceae_tb_occur <- Chrysophyceae_tb[,1:43]
length(Chrysophyceae_tb_occur[,colSums(Chrysophyceae_tb_occur) > 0])
## [1] 43
Pelagophyceae_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Pelagophyceae"),]
Pelagophyceae_tb_occur <- Pelagophyceae_tb[,1:43]
length(Pelagophyceae_tb_occur[,colSums(Pelagophyceae_tb_occur) > 0])
## [1] 43
Dictyochophyceae_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Dictyochophyceae"),]
Dictyochophyceae_tb_occur <- Dictyochophyceae_tb[,1:43]
length(Dictyochophyceae_tb_occur[,colSums(Dictyochophyceae_tb_occur) > 0])
## [1] 43
Cryptomonadales_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Cryptophyceae"),]
Cryptomonadales_tb_occur <- Cryptomonadales_tb[,1:43]
length(Cryptomonadales_tb_occur[,colSums(Cryptomonadales_tb_occur) > 0])
## [1] 39
Bacillariophyta_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Bacillariophyceae"),]
Bacillariophyta_tb_occur <- Bacillariophyta_tb[,1:43]
length(Bacillariophyta_tb_occur[,colSums(Bacillariophyta_tb_occur) > 0])
## [1] 42
Chlorarachniophyta_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Chlorarachniophyceae"),]
Chlorarachniophyta_tb_occur <- Chlorarachniophyta_tb[,1:43]
length(Chlorarachniophyta_tb_occur[,colSums(Chlorarachniophyta_tb_occur) > 0])
## [1] 41
Bolidophyceae_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Bolidophyceae"),]
Bolidophyceae_tb_occur <- Bolidophyceae_tb[,1:43]
length(Bolidophyceae_tb_occur[,colSums(Bolidophyceae_tb_occur) > 0])
## [1] 43
Pinguiochysidales_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Pinguiophyceae"),]
Pinguiochysidales_tb_occur <- Pinguiochysidales_tb[,1:43]
length(Pinguiochysidales_tb_occur[,colSums(Pinguiochysidales_tb_occur) > 0])
## [1] 0
Prymnesiophyceae_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Prymnesiophyceae"),]
Prymnesiophyceae_tb_occur <- Prymnesiophyceae_tb[,1:43]
length(Prymnesiophyceae_tb_occur[,colSums(Prymnesiophyceae_tb_occur) > 0])
## [1] 43
Mamiellophyceae_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Mamiellophyceae"),]
Mamiellophyceae_tb_occur <- Mamiellophyceae_tb[,1:43]
length(Mamiellophyceae_tb_occur[,colSums(Mamiellophyceae_tb_occur) > 0])
## [1] 41
Eustigmatales_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Eustigmatophyceae"),]
Eustigmatales_tb_occur <- Eustigmatales_tb[,1:43]
length(Eustigmatales_tb_occur[,colSums(Eustigmatales_tb_occur) > 0])
## [1] 42
Chlorophyceae_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Chlorophyceae"),]
Chlorophyceae_tb_occur <- Chlorophyceae_tb[,1:43]
length(Chlorophyceae_tb_occur[,colSums(Chlorophyceae_tb_occur) > 0])
## [1] 26
Ulvophyceae_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Ulvophyceae"),]
Ulvophyceae_tb_occur <- Ulvophyceae_tb[,1:43]
length(Ulvophyceae_tb_occur[,colSums(Ulvophyceae_tb_occur) > 0])
## [1] 3
Raphydophyceae_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Raphydophyceae"),]
Raphydophyceae_tb_occur <- Raphydophyceae_tb[,1:43]
length(Raphydophyceae_tb_occur[,colSums(Raphydophyceae_tb_occur) > 0])
## [1] 0
Trebouxiophyceae_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Trebouxiophyceae"),]
Trebouxiophyceae_tb_occur <- Trebouxiophyceae_tb[,1:43]
length(Trebouxiophyceae_tb_occur[,colSums(Trebouxiophyceae_tb_occur) > 0])
## [1] 26
Phaeophyceae_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Phaeophyceae"),]
Phaeophyceae_tb_occur <- Phaeophyceae_tb[,1:43]
length(Phaeophyceae_tb_occur[,colSums(Phaeophyceae_tb_occur) > 0])
## [1] 4
Phaeothamniophyceae_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Phaeothamniophyceae"),]
Phaeothamniophyceae_tb_occur <- Phaeothamniophyceae_tb[,1:43]
length(Phaeothamniophyceae_tb_occur[,colSums(Phaeothamniophyceae_tb_occur) > 0])
## [1] 1
Xanthophyceae_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Xanthophyceae"),]
Xanthophyceae_tb_occur <- Xanthophyceae_tb[,1:43]
length(Xanthophyceae_tb_occur[,colSums(Xanthophyceae_tb_occur) > 0])
## [1] 14
Chlorodendrophyceae_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Chlorodendrophyceae"),]
Chlorodendrophyceae_tb_occur <- Chlorodendrophyceae_tb[,1:43]
length(Chlorodendrophyceae_tb_occur[,colSums(Chlorodendrophyceae_tb_occur) > 0])
## [1] 0
IncertaeSedis_Archaeplastida_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "IncertaeSedis_Archaeplastida"),]
IncertaeSedis_Archaeplastida_tb_occur <- IncertaeSedis_Archaeplastida_tb[,1:43]
length(IncertaeSedis_Archaeplastida_tb_occur[,colSums(IncertaeSedis_Archaeplastida_tb_occur) > 0])
## [1] 1
Nephroselmidophyceae_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Nephroselmidophyceae"),]
Nephroselmidophyceae_tb_occur <- Nephroselmidophyceae_tb[,1:43]
length(Nephroselmidophyceae_tb_occur[,colSums(Nephroselmidophyceae_tb_occur) > 0])
## [1] 19
Pavlovophyceae_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Pavlovophyceae"),]
Pavlovophyceae_tb_occur <- Pavlovophyceae_tb[,1:43]
length(Pavlovophyceae_tb_occur[,colSums(Pavlovophyceae_tb_occur) > 0])
## [1] 37
Rhodophyceae_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Rhodophyceae"),]
Rhodophyceae_tb_occur <- Rhodophyceae_tb[,1:43]
length(Rhodophyceae_tb_occur[,colSums(Rhodophyceae_tb_occur) > 0])
## [1] 13
Rappemonads_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Rappemonads"),]
Rappemonads_tb_occur <- Rappemonads_tb[,1:43]
length(Rappemonads_tb_occur[,colSums(Rappemonads_tb_occur) > 0])
## [1] 0
MOCH_1_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "MOCH-1"),]
MOCH_1_tb_occur <- MOCH_1_tb[,1:43]
length(MOCH_1_tb_occur[,colSums(MOCH_1_tb_occur) > 0])
## [1] 0
MOCH_2_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "MOCH-2"),]
MOCH_2_tb_occur <- MOCH_2_tb[,1:43]
length(MOCH_2_tb_occur[,colSums(MOCH_2_tb_occur) > 0])
## [1] 0
MOCH_5_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "MOCH-5"),]
MOCH_5_tb_occur <- MOCH_5_tb[,1:43]
length(MOCH_5_tb_occur[,colSums(MOCH_5_tb_occur) > 0])
## [1] 0
Prasinophyceae_clade_VII_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Prasinophyceae_clade-VII"),]
Prasinophyceae_clade_VII_tb_occur <- Prasinophyceae_clade_VII_tb[,1:43]
length(Prasinophyceae_clade_VII_tb_occur[,colSums(Prasinophyceae_clade_VII_tb_occur) > 0])
## [1] 42
Prasinophyceae_clade_IX_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Prasinophyceae_clade-IX"),]
Prasinophyceae_clade_IX_tb_occur <- Prasinophyceae_clade_IX_tb[,1:43]
length(Prasinophyceae_clade_IX_tb_occur[,colSums(Prasinophyceae_clade_IX_tb_occur) > 0])
## [1] 29
Pyramimonadaceae_tb <- tb18_phototrophs_non.norm[which(tb18_phototrophs_non.norm$classif == "Pyramimonadaceae"),]
Pyramimonadaceae_tb_occur <- Pyramimonadaceae_tb[,1:43]
length(Pyramimonadaceae_tb_occur[,colSums(Pyramimonadaceae_tb_occur) > 0])
## [1] 33
## reads_per_class OTUs_per_class
## Bacillariophyceae 2664 374
## Bolidophyceae 509 25
## Chlorarachniophyceae 327 20
## Chlorophyceae 79 45
## Chrysophyceae 2824 240
## reads_per_class OTUs_per_class
## Prymnesiophyceae 34758 177
## Dinophyceae 23637 1476
## Mamiellophyceae 15846 117
## Pelagophyceae 9306 25
## Prasinophyceae_clade-VII 3575 37
## Chrysophyceae 2824 240
## Dictyochophyceae 2819 52
## Bacillariophyceae 2664 374
## Cryptophyceae 1922 95
## Bolidophyceae 509 25
## Pyramimonadaceae 507 19
## other_Prasinophyceae 492 26
## Eustigmatophyceae 359 27
## Chlorarachniophyceae 327 20
## Pavlovophyceae 228 13
## Prasinophyceae_clade-IX 155 16
## Trebouxiophyceae 121 57
## Chlorophyceae 79 45
## Nephroselmidophyceae 64 7
## Xanthophyceae 21 11
## Rhodophyceae 16 12
## Ulvophyceae 6 5
## Phaeophyceae 5 4
## IncertaeSedis_Archaeplastida 1 1
## Phaeothamniophyceae 1 1
## samples_per_class
## Prymnesiophyceae 43
## Dinophyceae 43
## Mamiellophyceae 41
## Pelagophyceae 43
## Prasinophyceae_clade-VII 42
## Chrysophyceae 43
## Dictyochophyceae 43
## Bacillariophyceae 42
## Cryptophyceae 39
## Bolidophyceae 42
## Pyramimonadaceae 33
## other_Prasinophyceae 41
## Eustigmatophyceae 42
## Chlorarachniophyceae 41
## Pavlovophyceae 37
## Prasinophyceae_clade-IX 29
## Trebouxiophyceae 26
## Chlorophyceae 26
## Nephroselmidophyceae 19
## Xanthophyceae 14
## Rhodophyceae 13
## Ulvophyceae 3
## Phaeophyceae 4
## IncertaeSedis_Archaeplastida 2
## Phaeothamniophyceae 1
## reads_per_class OTUs_per_class samples_per_class
## 100.000 100.000 1748.837
## reads_per_class OTUs_per_class
## Prymnesiophyceae 3.467409e+01 6.14156836
## Dinophyceae 2.357994e+01 51.21443442
## Mamiellophyceae 1.580775e+01 4.05968078
## Pelagophyceae 9.283534e+00 0.86745316
## Prasinophyceae_clade-VII 3.566369e+00 1.28383067
## Chrysophyceae 2.817182e+00 8.32755031
## Dictyochophyceae 2.812194e+00 1.80430257
## Bacillariophyceae 2.657569e+00 12.97709924
## Cryptophyceae 1.917360e+00 3.29632200
## Bolidophyceae 5.077712e-01 0.86745316
## Pyramimonadaceae 5.057760e-01 0.65926440
## other_Prasinophyceae 4.908122e-01 0.90215128
## Eustigmatophyceae 3.581333e-01 0.93684941
## Chlorarachniophyceae 3.262106e-01 0.69396253
## Pavlovophyceae 2.274496e-01 0.45107564
## Prasinophyceae_clade-IX 1.546258e-01 0.55517002
## Trebouxiophyceae 1.207079e-01 1.97779320
## Chlorophyceae 7.880928e-02 1.56141568
## Nephroselmidophyceae 6.384549e-02 0.24288688
## Xanthophyceae 2.094930e-02 0.38167939
## Rhodophyceae 1.596137e-02 0.41637752
## Ulvophyceae 5.985515e-03 0.17349063
## Phaeophyceae 4.987929e-03 0.13879251
## IncertaeSedis_Archaeplastida 9.975858e-04 0.03469813
## Phaeothamniophyceae 9.975858e-04 0.03469813
## samples_per_class
## Prymnesiophyceae 100.000000
## Dinophyceae 100.000000
## Mamiellophyceae 95.348837
## Pelagophyceae 100.000000
## Prasinophyceae_clade-VII 97.674419
## Chrysophyceae 100.000000
## Dictyochophyceae 100.000000
## Bacillariophyceae 97.674419
## Cryptophyceae 90.697674
## Bolidophyceae 97.674419
## Pyramimonadaceae 76.744186
## other_Prasinophyceae 95.348837
## Eustigmatophyceae 97.674419
## Chlorarachniophyceae 95.348837
## Pavlovophyceae 86.046512
## Prasinophyceae_clade-IX 67.441860
## Trebouxiophyceae 60.465116
## Chlorophyceae 60.465116
## Nephroselmidophyceae 44.186047
## Xanthophyceae 32.558140
## Rhodophyceae 30.232558
## Ulvophyceae 6.976744
## Phaeophyceae 9.302326
## IncertaeSedis_Archaeplastida 4.651163
## Phaeothamniophyceae 2.325581
Reads per class vs. OTUs per class:
Reads OTUs per class vs. samples in which they occurr:
Let’s read the dataset and remove the samples containing less than 36155 reads:
Table dimensions and content outline:
## [1] 28294 43
## TARA_102_SUR_0d2_3 TARA_109_SUR_0d2_3 TARA_110_SUR_0d2_3
## OTU_1 0 1 0
## OTU_2 0 0 0
## OTU_3 0 0 0
## OTU_4 2 3 1
## OTU_5 0 0 0
## TARA_111_SUR_0d2_3 TARA_112_SUR_0d2_3
## OTU_1 0 1
## OTU_2 0 0
## OTU_3 0 0
## OTU_4 5 2
## OTU_5 0 0
Minimum number of reads per station:
min(colSums(tb16_tax_occur_min36155))
## [1] 36155
Maximum number of reads per station:
max(colSums(tb16_tax_occur_min36155))
## [1] 158933
Identification of stations with higher number of reads:
amplicons_per_sample<-colSums(tb16_tax_occur_min36155)
amplicons_per_sample[which(colSums(tb16_tax_occur_min36155)>150000)]
## TARA_64_SUR_0d2_3 TARA_85_SUR_0d2_3
## 158933 150654
Overall reads per sample:
Let’s normalize the original dataset by randomly subsampling 36155 reads in each station:
tb16_tax_occur_min36155_t<-t(tb16_tax_occur_min36155)
tb16_tax_occur_ss36155<-rrarefy(tb16_tax_occur_min36155_t, 36155)
The normalized table shows the following dimensions and format:
## [1] 43 28294
## OTU_1 OTU_2 OTU_3 OTU_4 OTU_5
## TARA_102_SUR_0d2_3 0 0 0 1 0
## TARA_109_SUR_0d2_3 1 0 0 2 0
## TARA_110_SUR_0d2_3 0 0 0 1 0
## TARA_111_SUR_0d2_3 0 0 0 1 0
## TARA_112_SUR_0d2_3 0 0 0 0 0
Its content fits with the expected normalization values (36155 reads per station):
rowSums(tb16_tax_occur_ss36155)
## TARA_102_SUR_0d2_3 TARA_109_SUR_0d2_3 TARA_110_SUR_0d2_3
## 36155 36155 36155
## TARA_111_SUR_0d2_3 TARA_112_SUR_0d2_3 TARA_122_SUR_0d2_3
## 36155 36155 36155
## TARA_123_SUR_0d2_3 TARA_124_SUR_0d2_3 TARA_125_SUR_0d2_3
## 36155 36155 36155
## TARA_128_SUR_0d2_3 TARA_132_SUR_0d2_3 TARA_133_SUR_0d2_3
## 36155 36155 36155
## TARA_137_SUR_0d2_3 TARA_138_SUR_0d2_3 TARA_140_SUR_0d2_3
## 36155 36155 36155
## TARA_142_SUR_0d2_3 TARA_145_SUR_0d2_3 TARA_146_SUR_0d2_3
## 36155 36155 36155
## TARA_148_SUR_0d2_3 TARA_149_SUR_0d2_3 TARA_150_SUR_0d2_3
## 36155 36155 36155
## TARA_151_SUR_0d2_3 TARA_152_SUR_0d2_3 TARA_56_SUR_0d2_3
## 36155 36155 36155
## TARA_57_SUR_0d2_3 TARA_62_SUR_0d2_3 TARA_64_SUR_0d2_3
## 36155 36155 36155
## TARA_65_SUR_0d2_3 TARA_66_SUR_0d2_3 TARA_68_SUR_0d2_3
## 36155 36155 36155
## TARA_72_SUR_0d2_3 TARA_76_SUR_0d2_3 TARA_78_SUR_0d2_3
## 36155 36155 36155
## TARA_82_SUR_0d2_3 TARA_84_SUR_0d2_3 TARA_85_SUR_0d2_3
## 36155 36155 36155
## TARA_96_SUR_0d2_3 TARA_98_SUR_0d2_3 TARA_99_SUR_0d2_3
## 36155 36155 36155
## MP0311 MP1517 MP1672
## 36155 36155 36155
## MP2821
## 36155
Let’s check out how many OTUs don’t appear in the new table:
length(which(colSums(tb16_tax_occur_ss36155)==0))
## [1] 8568
There are 8045 OTUs that don’t show any occurrence in the normalized data. Let’s remove them from the table and take a look at its final dimensions:
tb16_tax_occur_ss36155_no_cero<-tb16_tax_occur_ss36155[,-(which(colSums(tb16_tax_occur_ss36155)==0))]
tb16_tax_occur_ss36155_no_cero<-tb16_tax_occur_ss36155_no_cero[mixedorder(row.names(tb16_tax_occur_ss36155_no_cero)),]
dim(tb16_tax_occur_ss36155_no_cero)
## [1] 43 19726
Datasets summary:
dim(tb16_tax)
## [1] 28294 58
dim(tb16_tax_occur)
## [1] 28294 55
dim(tb16_tax_occur_ss36155_no_cero)
## [1] 43 19726
#28294 58
#28294 55
#43 19726
Most of the samples take Shannon Index values around 6:
Lowest number of OTUs per sample:
## [1] 2503
Maximum number of OTUs per sample:
## [1] 5092
In most of the samples, we can identify between 600 and 700 OTUs:
plot(sort(OTUs_per_sample_16S_tax_occur_ss36155), pch=19)
boxplot(OTUs_per_sample_16S_tax_occur_ss36155, pch=19)
pielou_evenness_16S_tax_occur_ss36155<-tb16_tax_occur_ss36155_div/log(OTUs_per_sample_16S_tax_occur_ss36155)
The Pielou index (constrained between 0 and 1) takes values closer to 1 as the variation of species proportion in a sample increases. Most of the samples get values around 0.95, meaning that the numerical composition of different OTUs in a sample is highly variable - there’s no constant presence of dominant species.
The less variation in communities between the species (and the presence of a dominant specie), the lower J’ is.
plot(sort(pielou_evenness_16S_tax_occur_ss36155), pch=19)
boxplot(pielou_evenness_16S_tax_occur_ss36155, pch=19)
The OTU_38, with 874 reads, is the most abundant in the overall dataset:
head(sort(colSums(tb16_tax_occur_ss36155_no_cero), decreasing=T), n=10L)
## OTU_1754 OTU_2893 OTU_761 OTU_2428 OTU_3736 OTU_1942 OTU_2466 OTU_3271
## 17121 13628 13145 12010 11106 10244 10043 8626
## OTU_5398 OTU_5330
## 8526 7736
Most of the OTUs show very few occurrences; suggesting that we will probably be able to identify a significant ammount of rare otus:
plot(log(sort(colSums(tb16_tax_occur_ss36155_no_cero), decreasing=T)), pch=19)
The OTUs abundance distribution fits relativelly close to log-normal model.
According to Preston’s lognormal model fit into species frequencies groups, we’re missing 1588.816 species:
tb16_tax_occur_ss36155_prestonfit<-prestonfit(colSums(tb16_tax_occur_min36155_t))
plot(tb16_tax_occur_ss36155_prestonfit, main="Pooled species")
veiledspec(tb16_tax_occur_ss36155_prestonfit)
## Extrapolated Observed Veiled
## 108452.1 24691.0 83761.1
When computing Prestons’ lognormal model fit without pooling data into groups, we seem to miss 1385.371 species:
## Extrapolated Observed Veiled
## 75267.64 24691.00 50576.64
rarec_input<-t(as.matrix(colSums(tb16_tax_occur_ss36155_no_cero)))
#tb16_rarecurve_step1000_10000<-rarecurve(rarec_input, step = 1000, 10000, xlab = "Sample size", ylab = "OTUs", label = TRUE, main="16S amplicons diversity step=1000 & ss=10000\n(40,816 OTUs; 5,247,375 reads)\n")
#tb16_rarecurve_step1000_10000
tb16_rarecurve_step100_5700<-rarecurve(rarec_input, step = 1000, 19700, xlab = "Sample size", ylab = "OTUs", label = TRUE, main="16S amplicons diversity step=100 & ss=5700\n(19,756 OTUs; 1,554,665 reads)\n")
#without rarefying
rarec_allOTUs_input<-t(as.matrix(colSums(t(tb16_tax_occur))))
tb16_rarecurve_allOTUs_step100_8642<-rarecurve(rarec_allOTUs_input, step = 1000, 28200, xlab = "Sample size", ylab = "OTUs", label = TRUE, main="16S amplicons diversity non-rarefied step=1000 & ss=100000\n(28,294 OTUs; 4,285,523 reads)\n")
The Bray-Curtis dissimilarity, constrained between 0 (minimum distance) and 1 (highest dissimilarity) allows us to quantify the differences between samples according to the composition and relative abundance of their OTUs. In our dataset, most of the samples pairs take dissimilarity values around 0.8, meaning that their composition is substantially different.
The stations seem to form clusters according to geographic localization, but there are no evident clusters separated from the general groups, apart from the one containing TARA_84, TARA_82 and TARA_85.
(To be done: assign Longhurst provinces information to each station and check if any of the central clusters is meaningful regarding to the samples’ geographical location)
We can identify a prominent group in the central part of the NMDS plot and a few outliers (TARA 82, 84 and 85) in the central-right edge of the plot. The stress parameter takes a value below 0.2, suggesting that the plot is acceptable.
##
## Call:
## monoMDS(dist = tb16_tax_occur_ss36155_no_cero.bray)
##
## Non-metric Multidimensional Scaling
##
## 43 points, dissimilarity 'bray', call 'vegdist(x = tb16_tax_occur_ss36155_no_cero, method = "bray")'
##
## Dimensions: 2
## Stress: 9.589501e-05
## Stress type 1, weak ties
## Scores scaled to unit root mean square, rotated to principal components
## Stopped after 62 iterations: Stress nearly zero (< smin)
When implementing a most robut function for computing NMDS plots, the result is quiet the same:
## Run 0 stress 9.301068e-05
## Run 1 stress 9.158128e-05
## ... New best solution
## ... Procrustes: rmse 5.375076e-05 max resid 0.0002756587
## ... Similar to previous best
## Run 2 stress 9.428448e-05
## ... Procrustes: rmse 2.781509e-05 max resid 0.0001387135
## ... Similar to previous best
## Run 3 stress 9.332153e-05
## ... Procrustes: rmse 0.0001081792 max resid 0.0003003597
## ... Similar to previous best
## Run 4 stress 9.620788e-05
## ... Procrustes: rmse 7.880078e-05 max resid 0.0002958135
## ... Similar to previous best
## Run 5 stress 7.292808e-05
## ... New best solution
## ... Procrustes: rmse 8.029629e-05 max resid 0.0002395264
## ... Similar to previous best
## Run 6 stress 9.457543e-05
## ... Procrustes: rmse 9.02236e-05 max resid 0.0002043675
## ... Similar to previous best
## Run 7 stress 9.195291e-05
## ... Procrustes: rmse 7.181292e-05 max resid 0.0001653436
## ... Similar to previous best
## Run 8 stress 9.669817e-05
## ... Procrustes: rmse 8.197419e-05 max resid 0.0002534159
## ... Similar to previous best
## Run 9 stress 9.961385e-05
## ... Procrustes: rmse 5.636359e-05 max resid 0.0001619826
## ... Similar to previous best
## Run 10 stress 9.64433e-05
## ... Procrustes: rmse 8.938231e-05 max resid 0.0002629114
## ... Similar to previous best
## Run 11 stress 7.614704e-05
## ... Procrustes: rmse 7.66202e-05 max resid 0.0002242664
## ... Similar to previous best
## Run 12 stress 9.185541e-05
## ... Procrustes: rmse 5.891344e-05 max resid 0.0002376396
## ... Similar to previous best
## Run 13 stress 7.979814e-05
## ... Procrustes: rmse 7.351333e-05 max resid 0.0001877524
## ... Similar to previous best
## Run 14 stress 9.896201e-05
## ... Procrustes: rmse 9.046256e-05 max resid 0.0002714003
## ... Similar to previous best
## Run 15 stress 9.814128e-05
## ... Procrustes: rmse 2.413431e-05 max resid 7.570832e-05
## ... Similar to previous best
## Run 16 stress 9.22882e-05
## ... Procrustes: rmse 8.985092e-05 max resid 0.0002411534
## ... Similar to previous best
## Run 17 stress 9.977628e-05
## ... Procrustes: rmse 9.592451e-05 max resid 0.000260223
## ... Similar to previous best
## Run 18 stress 9.807055e-05
## ... Procrustes: rmse 8.39958e-05 max resid 0.00019424
## ... Similar to previous best
## Run 19 stress 9.558017e-05
## ... Procrustes: rmse 3.817104e-05 max resid 0.0001220175
## ... Similar to previous best
## Run 20 stress 9.839229e-05
## ... Procrustes: rmse 4.602517e-05 max resid 0.0001242549
## ... Similar to previous best
## *** Solution reached
## Warning in metaMDS(tb16_tax_occur_ss36155_no_cero.bray): Stress is (nearly)
## zero - you may have insufficient data
## Warning in ordiplot(x, choices = choices, type = type, display = display, :
## Species scores not available
## Warning in if (class(lats) == "SpatialPoints") lats <- coordinates(lats):
## the condition has length > 1 and only the first element will be used
Working datasets:
dim(tb16_tax_occur_ss36155_no_cero)
## [1] 43 19763
tb16_tax_occur_ss36155_no_cero[1:5, 1:5]
## OTU_1 OTU_3 OTU_4 OTU_5 OTU_7
## MP0311 0 0 0 0 0
## MP1517 0 0 3 0 1
## MP1672 0 0 0 0 0
## MP2821 1 0 0 0 1
## TARA_102_SUR_0d2_3 0 0 1 0 0
tb16_tax_occur_ss36155_no_cero.bray<-as.matrix(tb16_tax_occur_ss36155_no_cero.bray)
## [1] 43 43
dim(geo_distances_MP_16S)
## [1] 43 43
Communities quickly change their composition across geographical distances:
plot(geo_distances_MP_16S, tb16_tax_occur_ss36155_no_cero.bray, pch=19, cex=0.4, xlab="Geopgraphical distances", ylab="Bray-Curtis dissimilarities")
Mantel statistic is -significantlly- so low, meaning that the correlation between samples dissimilarity and geographical distances is weak.
mantel(geo_distances_MP_16S, tb16_tax_occur_ss36155_no_cero.bray)
##
## Mantel statistic based on Pearson's product-moment correlation
##
## Call:
## mantel(xdis = geo_distances_MP_16S, ydis = tb16_tax_occur_ss36155_no_cero.bray)
##
## Mantel statistic r: 0.02826
## Significance: 0.279
##
## Upper quantiles of permutations (null model):
## 90% 95% 97.5% 99%
## 0.0873 0.1167 0.1444 0.1697
## Permutation: free
## Number of permutations: 999
Maximum distance between samples:
## [1] 19543.94
Minimum distance between samples:
## [1] 0
Correlograms:
MP_16s_ss36155_mantel_correl_by_1000km<-mantel.correlog(tb16_tax_occur_ss36155_no_cero.bray, D.geo=geo_distances_MP_16S, break.pts=seq(0,20000, by=1000))
plot(MP_16s_ss36155_mantel_correl_by_1000km)
MP_16s_ss36155_mantel_correl_by_100km<-mantel.correlog(tb16_tax_occur_ss36155_no_cero.bray, D.geo=geo_distances_MP_16S, break.pts=seq(0,20000, by=100))
plot(MP_16s_ss36155_mantel_correl_by_100km)
In the following plot, we can appreciate the OTUs distribution according to their percentage of occurence and relative abundance. The red line keeps up OTUs that occur in more than 80% of the samples, the green line limits regionally rare OTUs (< 0.001%), and the blue one restricts regionally abundant OTUs (> 0.1%).
Regionally abundant OTUs (relative abundance over 0.1%):
tb16_class_prov<-cbind(otu_names=row.names(tb16_class),tb16_class)
## otu_names mean_rabund perc_occur class_B
## 1 OTU_10747 0.001144941 55.81395 other_bacteria
## 2 OTU_12129 0.001116639 65.11628 Synechococcus
## 3 OTU_1329 0.002241640 93.02326 other_bacteria
## 4 OTU_1676 0.002044814 88.37209 Prochlorococcus
## 5 OTU_1738 0.001929676 93.02326 other_bacteria
## 6 OTU_1754 0.011149026 93.02326 Prochlorococcus
## 7 OTU_1942 0.006453480 93.02326 Prochlorococcus
## 8 OTU_223 0.001027231 93.02326 other_bacteria
## 9 OTU_23 0.001443398 93.02326 other_bacteria
## 10 OTU_2428 0.007645377 93.02326 other_bacteria
## 11 OTU_2466 0.006429681 93.02326 Prochlorococcus
## 12 OTU_2789 0.001014366 100.00000 other_bacteria
## 13 OTU_27900 0.002379934 100.00000 Prymnesiophyceae
## 14 OTU_27901 0.003573117 81.39535 Prymnesiophyceae
## 15 OTU_27912 0.001820971 46.51163 Prymnesiophyceae
## 16 OTU_27927 0.003777663 48.83721 Mamiellophyceae
## 17 OTU_27934 0.001615139 95.34884 Prymnesiophyceae
## 18 OTU_27938 0.001353989 93.02326 Pelagophyceae
## 19 OTU_28017 0.001251717 79.06977 Pelagophyceae
## 20 OTU_28046 0.001222771 55.81395 Mamiellophyceae
## 21 OTU_28060 0.001001502 41.86047 Mamiellophyceae
## 22 OTU_28061 0.002150302 51.16279 Mamiellophyceae
## 23 OTU_2866 0.003430321 90.69767 other_bacteria
## 24 OTU_2877 0.002066683 95.34884 other_bacteria
## 25 OTU_2893 0.008587059 93.02326 other_bacteria
## 26 OTU_29 0.002464840 100.00000 other_bacteria
## 27 OTU_2980 0.001006648 100.00000 other_bacteria
## 28 OTU_3004 0.002071829 93.02326 other_bacteria
## 29 OTU_3023 0.001958621 93.02326 other_bacteria
## 30 OTU_3162 0.002286023 95.34884 other_bacteria
## 31 OTU_3172 0.002888082 88.37209 other_bacteria
## 32 OTU_3173 0.001013080 83.72093 other_bacteria
## 33 OTU_3187 0.003630364 100.00000 other_bacteria
## 34 OTU_3197 0.001815825 81.39535 other_bacteria
## 35 OTU_3198 0.001436966 93.02326 other_bacteria
## 36 OTU_3207 0.001030447 90.69767 other_bacteria
## 37 OTU_3214 0.001204761 93.02326 other_bacteria
## 38 OTU_3240 0.001783021 93.02326 other_bacteria
## 39 OTU_3259 0.002529805 58.13953 other_bacteria
## 40 OTU_3271 0.005586413 90.69767 Prochlorococcus
## 41 OTU_3308 0.002082121 97.67442 Synechococcus
## 42 OTU_3311 0.002843056 90.69767 Prochlorococcus
## 43 OTU_3326 0.001315396 90.69767 other_bacteria
## 44 OTU_333 0.001588123 100.00000 other_bacteria
## 45 OTU_3347 0.001297386 97.67442 other_bacteria
## 46 OTU_3680 0.001653732 93.02326 other_bacteria
## 47 OTU_3731 0.001496142 100.00000 other_bacteria
## 48 OTU_3736 0.007181611 93.02326 Prochlorococcus
## 49 OTU_3789 0.001064538 100.00000 other_bacteria
## 50 OTU_3803 0.001385507 100.00000 other_bacteria
## 51 OTU_3810 0.001757292 93.02326 other_bacteria
## 52 OTU_4036 0.001322471 97.67442 other_bacteria
## 53 OTU_4048 0.002347129 100.00000 other_bacteria
## 54 OTU_4083 0.001072257 95.34884 other_bacteria
## 55 OTU_4366 0.001663381 100.00000 other_bacteria
## 56 OTU_4560 0.001658878 100.00000 other_bacteria
## 57 OTU_4664 0.001741211 100.00000 other_bacteria
## 58 OTU_4926 0.001751503 93.02326 other_bacteria
## 59 OTU_4991 0.001551460 95.34884 other_bacteria
## 60 OTU_520 0.001058749 93.02326 other_bacteria
## 61 OTU_5211 0.001081262 95.34884 other_bacteria
## 62 OTU_5222 0.001090910 93.02326 other_bacteria
## 63 OTU_5266 0.001177102 93.02326 other_bacteria
## 64 OTU_5268 0.001193826 100.00000 other_bacteria
## 65 OTU_5288 0.001213123 90.69767 Prochlorococcus
## 66 OTU_5301 0.001794599 95.34884 other_bacteria
## 67 OTU_5327 0.001590696 93.02326 other_bacteria
## 68 OTU_5330 0.004964414 93.02326 Prochlorococcus
## 69 OTU_5352 0.003766728 58.13953 other_bacteria
## 70 OTU_5355 0.001160379 100.00000 other_bacteria
## 71 OTU_5398 0.005423033 90.69767 Prochlorococcus
## 72 OTU_5403 0.002407593 90.69767 other_bacteria
## 73 OTU_5414 0.001204118 100.00000 other_bacteria
## 74 OTU_542 0.001498072 93.02326 other_bacteria
## 75 OTU_5428 0.001070970 100.00000 other_bacteria
## 76 OTU_5432 0.002228776 93.02326 other_bacteria
## 77 OTU_5441 0.002136152 93.02326 other_bacteria
## 78 OTU_5445 0.001281948 100.00000 other_bacteria
## 79 OTU_5447 0.002478991 100.00000 other_bacteria
## 80 OTU_5449 0.001256219 93.02326 other_bacteria
## 81 OTU_5491 0.001247857 100.00000 other_bacteria
## 82 OTU_5493 0.001173886 95.34884 other_bacteria
## 83 OTU_5499 0.001057463 93.02326 other_bacteria
## 84 OTU_5500 0.001371357 100.00000 other_bacteria
## 85 OTU_5511 0.001293526 93.02326 other_bacteria
## 86 OTU_5519 0.001240782 48.83721 other_bacteria
## 87 OTU_5523 0.001213123 100.00000 other_bacteria
## 88 OTU_5564 0.001316682 93.02326 other_bacteria
## 89 OTU_5586 0.001207334 95.34884 other_bacteria
## 90 OTU_5607 0.001467197 93.02326 other_bacteria
## 91 OTU_5624 0.001290953 95.34884 other_bacteria
## 92 OTU_5633 0.002238424 95.34884 other_bacteria
## 93 OTU_5635 0.002038381 100.00000 other_bacteria
## 94 OTU_5651 0.001368140 93.02326 other_bacteria
## 95 OTU_5653 0.001828047 93.02326 other_bacteria
## 96 OTU_5662 0.001409950 90.69767 other_bacteria
## 97 OTU_5665 0.001334693 93.02326 other_bacteria
## 98 OTU_5667 0.001355276 93.02326 other_bacteria
## 99 OTU_5691 0.001470413 93.02326 other_bacteria
## 100 OTU_5727 0.002715698 93.02326 other_bacteria
## 101 OTU_5741 0.001373929 100.00000 other_bacteria
## 102 OTU_5742 0.001326974 93.02326 other_bacteria
## 103 OTU_5752 0.001840911 90.69767 Prochlorococcus
## 104 OTU_5772 0.001898801 100.00000 other_bacteria
## 105 OTU_5786 0.001188681 83.72093 Synechococcus
## 106 OTU_5800 0.001009864 93.02326 other_bacteria
## 107 OTU_5804 0.001425387 93.02326 other_bacteria
## 108 OTU_5815 0.001025301 100.00000 other_bacteria
## 109 OTU_5817 0.001042025 100.00000 other_bacteria
## 110 OTU_5821 0.001091553 93.02326 other_bacteria
## 111 OTU_5823 0.001253003 93.02326 other_bacteria
## 112 OTU_5879 0.001743784 93.02326 other_bacteria
## 113 OTU_5880 0.001172600 90.69767 other_bacteria
## 114 OTU_5911 0.001307034 100.00000 other_bacteria
## 115 OTU_5925 0.001328260 86.04651 other_bacteria
## 116 OTU_5942 0.001259435 100.00000 other_bacteria
## 117 OTU_5961 0.001958621 100.00000 other_bacteria
## 118 OTU_5968 0.001397086 100.00000 other_bacteria
## 119 OTU_5976 0.001164881 93.02326 other_bacteria
## 120 OTU_5978 0.001993355 100.00000 other_bacteria
## 121 OTU_5982 0.001111493 93.02326 other_bacteria
## 122 OTU_5985 0.001362351 100.00000 other_bacteria
## 123 OTU_5988 0.001011150 93.02326 other_bacteria
## 124 OTU_6047 0.001099915 100.00000 other_bacteria
## 125 OTU_6067 0.001894942 100.00000 other_bacteria
## 126 OTU_6083 0.003165955 86.04651 Synechococcus
## 127 OTU_6102 0.001303175 93.02326 other_bacteria
## 128 OTU_6155 0.001512866 100.00000 other_bacteria
## 129 OTU_6164 0.002118784 93.02326 other_bacteria
## 130 OTU_6193 0.001176459 100.00000 other_bacteria
## 131 OTU_635 0.003025732 93.02326 other_bacteria
## 132 OTU_709 0.003061110 93.02326 other_bacteria
## 133 OTU_7407 0.001341125 90.69767 other_bacteria
## 134 OTU_7455 0.001168097 100.00000 other_bacteria
## 135 OTU_759 0.001018226 86.04651 Synechococcus
## 136 OTU_761 0.008529169 93.02326 Prochlorococcus
## 137 OTU_790 0.001005361 100.00000 other_bacteria
## 138 OTU_7987 0.001436966 100.00000 other_bacteria
## 139 OTU_8011 0.003102276 83.72093 other_bacteria
## 140 OTU_8146 0.001164238 100.00000 other_bacteria
## 141 OTU_8153 0.001957335 93.02326 other_bacteria
## 142 OTU_8163 0.002039668 100.00000 other_bacteria
## 143 OTU_8367 0.001419598 100.00000 other_bacteria
## 144 OTU_8377 0.001168741 100.00000 other_bacteria
## 145 OTU_8436 0.001075473 100.00000 other_bacteria
## 146 OTU_8458 0.001327617 100.00000 other_bacteria
## 147 OTU_9255 0.001645371 86.04651 Synechococcus
## 148 OTU_9267 0.001112780 83.72093 Synechococcus
## 149 OTU_934 0.001139795 100.00000 other_bacteria
## [1] 149 5
Proportion of regionally abundant OTUs (%):
## [1] 0.7539341
Cosmopolitan OTUs (relative abundance over 0.1% and occurence in more than 80% of samples):
otu_tb16_ss1154_cosmop_sorted_prov<-cbind(otu_names=row.names(otu_tb16_ss36155_cosmop_sorted),otu_tb16_ss36155_cosmop_sorted)
## otu_names mean_rabund perc_occur class_B
## 1 OTU_1329 0.002241640 93.02326 other_bacteria
## 2 OTU_1676 0.002044814 88.37209 Prochlorococcus
## 3 OTU_1738 0.001929676 93.02326 other_bacteria
## 4 OTU_1754 0.011149026 93.02326 Prochlorococcus
## 5 OTU_1942 0.006453480 93.02326 Prochlorococcus
## 6 OTU_223 0.001027231 93.02326 other_bacteria
## 7 OTU_23 0.001443398 93.02326 other_bacteria
## 8 OTU_2428 0.007645377 93.02326 other_bacteria
## 9 OTU_2466 0.006429681 93.02326 Prochlorococcus
## 10 OTU_2789 0.001014366 100.00000 other_bacteria
## 11 OTU_27900 0.002379934 100.00000 Prymnesiophyceae
## 12 OTU_27901 0.003573117 81.39535 Prymnesiophyceae
## 13 OTU_27934 0.001615139 95.34884 Prymnesiophyceae
## 14 OTU_27938 0.001353989 93.02326 Pelagophyceae
## 15 OTU_2866 0.003430321 90.69767 other_bacteria
## 16 OTU_2877 0.002066683 95.34884 other_bacteria
## 17 OTU_2893 0.008587059 93.02326 other_bacteria
## 18 OTU_29 0.002464840 100.00000 other_bacteria
## 19 OTU_2980 0.001006648 100.00000 other_bacteria
## 20 OTU_3004 0.002071829 93.02326 other_bacteria
## 21 OTU_3023 0.001958621 93.02326 other_bacteria
## 22 OTU_3162 0.002286023 95.34884 other_bacteria
## 23 OTU_3172 0.002888082 88.37209 other_bacteria
## 24 OTU_3173 0.001013080 83.72093 other_bacteria
## 25 OTU_3187 0.003630364 100.00000 other_bacteria
## 26 OTU_3197 0.001815825 81.39535 other_bacteria
## 27 OTU_3198 0.001436966 93.02326 other_bacteria
## 28 OTU_3207 0.001030447 90.69767 other_bacteria
## 29 OTU_3214 0.001204761 93.02326 other_bacteria
## 30 OTU_3240 0.001783021 93.02326 other_bacteria
## 31 OTU_3271 0.005586413 90.69767 Prochlorococcus
## 32 OTU_3308 0.002082121 97.67442 Synechococcus
## 33 OTU_3311 0.002843056 90.69767 Prochlorococcus
## 34 OTU_3326 0.001315396 90.69767 other_bacteria
## 35 OTU_333 0.001588123 100.00000 other_bacteria
## 36 OTU_3347 0.001297386 97.67442 other_bacteria
## 37 OTU_3680 0.001653732 93.02326 other_bacteria
## 38 OTU_3731 0.001496142 100.00000 other_bacteria
## 39 OTU_3736 0.007181611 93.02326 Prochlorococcus
## 40 OTU_3789 0.001064538 100.00000 other_bacteria
## 41 OTU_3803 0.001385507 100.00000 other_bacteria
## 42 OTU_3810 0.001757292 93.02326 other_bacteria
## 43 OTU_4036 0.001322471 97.67442 other_bacteria
## 44 OTU_4048 0.002347129 100.00000 other_bacteria
## 45 OTU_4083 0.001072257 95.34884 other_bacteria
## 46 OTU_4366 0.001663381 100.00000 other_bacteria
## 47 OTU_4560 0.001658878 100.00000 other_bacteria
## 48 OTU_4664 0.001741211 100.00000 other_bacteria
## 49 OTU_4926 0.001751503 93.02326 other_bacteria
## 50 OTU_4991 0.001551460 95.34884 other_bacteria
## 51 OTU_520 0.001058749 93.02326 other_bacteria
## 52 OTU_5211 0.001081262 95.34884 other_bacteria
## 53 OTU_5222 0.001090910 93.02326 other_bacteria
## 54 OTU_5266 0.001177102 93.02326 other_bacteria
## 55 OTU_5268 0.001193826 100.00000 other_bacteria
## 56 OTU_5288 0.001213123 90.69767 Prochlorococcus
## 57 OTU_5301 0.001794599 95.34884 other_bacteria
## 58 OTU_5327 0.001590696 93.02326 other_bacteria
## 59 OTU_5330 0.004964414 93.02326 Prochlorococcus
## 60 OTU_5355 0.001160379 100.00000 other_bacteria
## 61 OTU_5398 0.005423033 90.69767 Prochlorococcus
## 62 OTU_5403 0.002407593 90.69767 other_bacteria
## 63 OTU_5414 0.001204118 100.00000 other_bacteria
## 64 OTU_542 0.001498072 93.02326 other_bacteria
## 65 OTU_5428 0.001070970 100.00000 other_bacteria
## 66 OTU_5432 0.002228776 93.02326 other_bacteria
## 67 OTU_5441 0.002136152 93.02326 other_bacteria
## 68 OTU_5445 0.001281948 100.00000 other_bacteria
## 69 OTU_5447 0.002478991 100.00000 other_bacteria
## 70 OTU_5449 0.001256219 93.02326 other_bacteria
## 71 OTU_5491 0.001247857 100.00000 other_bacteria
## 72 OTU_5493 0.001173886 95.34884 other_bacteria
## 73 OTU_5499 0.001057463 93.02326 other_bacteria
## 74 OTU_5500 0.001371357 100.00000 other_bacteria
## 75 OTU_5511 0.001293526 93.02326 other_bacteria
## 76 OTU_5523 0.001213123 100.00000 other_bacteria
## 77 OTU_5564 0.001316682 93.02326 other_bacteria
## 78 OTU_5586 0.001207334 95.34884 other_bacteria
## 79 OTU_5607 0.001467197 93.02326 other_bacteria
## 80 OTU_5624 0.001290953 95.34884 other_bacteria
## 81 OTU_5633 0.002238424 95.34884 other_bacteria
## 82 OTU_5635 0.002038381 100.00000 other_bacteria
## 83 OTU_5651 0.001368140 93.02326 other_bacteria
## 84 OTU_5653 0.001828047 93.02326 other_bacteria
## 85 OTU_5662 0.001409950 90.69767 other_bacteria
## 86 OTU_5665 0.001334693 93.02326 other_bacteria
## 87 OTU_5667 0.001355276 93.02326 other_bacteria
## 88 OTU_5691 0.001470413 93.02326 other_bacteria
## 89 OTU_5727 0.002715698 93.02326 other_bacteria
## 90 OTU_5741 0.001373929 100.00000 other_bacteria
## 91 OTU_5742 0.001326974 93.02326 other_bacteria
## 92 OTU_5752 0.001840911 90.69767 Prochlorococcus
## 93 OTU_5772 0.001898801 100.00000 other_bacteria
## 94 OTU_5786 0.001188681 83.72093 Synechococcus
## 95 OTU_5800 0.001009864 93.02326 other_bacteria
## 96 OTU_5804 0.001425387 93.02326 other_bacteria
## 97 OTU_5815 0.001025301 100.00000 other_bacteria
## 98 OTU_5817 0.001042025 100.00000 other_bacteria
## 99 OTU_5821 0.001091553 93.02326 other_bacteria
## 100 OTU_5823 0.001253003 93.02326 other_bacteria
## 101 OTU_5879 0.001743784 93.02326 other_bacteria
## 102 OTU_5880 0.001172600 90.69767 other_bacteria
## 103 OTU_5911 0.001307034 100.00000 other_bacteria
## 104 OTU_5925 0.001328260 86.04651 other_bacteria
## 105 OTU_5942 0.001259435 100.00000 other_bacteria
## 106 OTU_5961 0.001958621 100.00000 other_bacteria
## 107 OTU_5968 0.001397086 100.00000 other_bacteria
## 108 OTU_5976 0.001164881 93.02326 other_bacteria
## 109 OTU_5978 0.001993355 100.00000 other_bacteria
## 110 OTU_5982 0.001111493 93.02326 other_bacteria
## 111 OTU_5985 0.001362351 100.00000 other_bacteria
## 112 OTU_5988 0.001011150 93.02326 other_bacteria
## 113 OTU_6047 0.001099915 100.00000 other_bacteria
## 114 OTU_6067 0.001894942 100.00000 other_bacteria
## 115 OTU_6083 0.003165955 86.04651 Synechococcus
## 116 OTU_6102 0.001303175 93.02326 other_bacteria
## 117 OTU_6155 0.001512866 100.00000 other_bacteria
## 118 OTU_6164 0.002118784 93.02326 other_bacteria
## 119 OTU_6193 0.001176459 100.00000 other_bacteria
## 120 OTU_635 0.003025732 93.02326 other_bacteria
## 121 OTU_709 0.003061110 93.02326 other_bacteria
## 122 OTU_7407 0.001341125 90.69767 other_bacteria
## 123 OTU_7455 0.001168097 100.00000 other_bacteria
## 124 OTU_759 0.001018226 86.04651 Synechococcus
## 125 OTU_761 0.008529169 93.02326 Prochlorococcus
## 126 OTU_790 0.001005361 100.00000 other_bacteria
## 127 OTU_7987 0.001436966 100.00000 other_bacteria
## 128 OTU_8011 0.003102276 83.72093 other_bacteria
## 129 OTU_8146 0.001164238 100.00000 other_bacteria
## 130 OTU_8153 0.001957335 93.02326 other_bacteria
## 131 OTU_8163 0.002039668 100.00000 other_bacteria
## 132 OTU_8367 0.001419598 100.00000 other_bacteria
## 133 OTU_8377 0.001168741 100.00000 other_bacteria
## 134 OTU_8436 0.001075473 100.00000 other_bacteria
## 135 OTU_8458 0.001327617 100.00000 other_bacteria
## 136 OTU_9255 0.001645371 86.04651 Synechococcus
## 137 OTU_9267 0.001112780 83.72093 Synechococcus
## 138 OTU_934 0.001139795 100.00000 other_bacteria
## [1] 138 5
Number and proportion (%) of cosmopolitan OTUs:
## [1] 138
## [1] 0.6982746
Number and proportion (%) of rare OTUs:
## [1] 13963
## [1] 70.65223
Let’s add the taxonomic classification by merging “tb16_tax_occur_ss36155_no_cero” with “tb16_tax”:
tb16_bacteria <- tb16_ss36155_tax_sorted[which(tb16_ss36155_tax_sorted$class_A == "other_bacteria" | tb16_ss36155_tax_sorted$class_A == "Cyanobacteria" ),]
tb16_protists <- tb16_ss36155_tax_sorted[which(tb16_ss36155_tax_sorted$class_A != "other_bacteria" & tb16_ss36155_tax_sorted$class_A != "Cyanobacteria"),]
dim(tb16_protists)
## [1] 318 47
dim(tb16_bacteria)
## [1] 19445 47
#tb16_protists[1:5,43:47]
#tb16_bacteria[1:5,43:47]
#create a table per group and count in how many samples they occur.
Dinophyceae_tb <- tb16_protists[which(tb16_protists$class_A == "Dinophyceae"),]
Dinophyceae_tb[1:5,1:5]
## MP0311 MP1517 MP1672 MP2821 TARA_102_SUR_0d2_3
## OTU_27905 0 0 0 0 0
## OTU_27950 0 0 0 0 0
## OTU_28007 1 0 0 0 0
## OTU_28206 0 0 0 0 0
## OTU_28215 0 0 0 0 0
Dinophyceae_tb_occur <- Dinophyceae_tb[,1:43]
Dinophyceae_tb_occur[1:5,1:5]
## MP0311 MP1517 MP1672 MP2821 TARA_102_SUR_0d2_3
## OTU_27905 0 0 0 0 0
## OTU_27950 0 0 0 0 0
## OTU_28007 1 0 0 0 0
## OTU_28206 0 0 0 0 0
## OTU_28215 0 0 0 0 0
dim(Dinophyceae_tb_occur)
## [1] 5 43
length(Dinophyceae_tb_occur[,colSums(Dinophyceae_tb_occur) > 0])
## [1] 7
#Dinophyceae_tb_samples <- Dinophyceae_tb_occur[,colSums(Dinophyceae_tb_occur) > 0]
#length(Dinophyceae_tb_samples[which(colSums(Dinophyceae_tb_occur) != 0)])
Prasinophyceae_tb <- tb16_protists[which(tb16_protists$class_A == "other_Prasinophyceae"),]
Prasinophyceae_tb_occur <- Prasinophyceae_tb[,1:43]
length(Prasinophyceae_tb_occur[,colSums(Prasinophyceae_tb_occur) > 0])
## [1] 39
Chrysophyceae_tb <- tb16_protists[which(tb16_protists$class_A == "Chrysophyceae"),]
Chrysophyceae_tb_occur <- Chrysophyceae_tb[,1:43]
length(Chrysophyceae_tb_occur[,colSums(Chrysophyceae_tb_occur) > 0])
## [1] 0
Pelagophyceae_tb <- tb16_protists[which(tb16_protists$class_A == "Pelagophyceae"),]
Pelagophyceae_tb_occur <- Pelagophyceae_tb[,1:43]
length(Pelagophyceae_tb_occur[,colSums(Pelagophyceae_tb_occur) > 0])
## [1] 43
Dictyochophyceae_tb <- tb16_protists[which(tb16_protists$class_A == "Dictyochophyceae"),]
Dictyochophyceae_tb_occur <- Dictyochophyceae_tb[,1:43]
length(Dictyochophyceae_tb_occur[,colSums(Dictyochophyceae_tb_occur) > 0])
## [1] 42
Cryptomonadales_tb <- tb16_protists[which(tb16_protists$class_A == "Cryptophyceae"),]
Cryptomonadales_tb_occur <- Cryptomonadales_tb[,1:43]
length(Cryptomonadales_tb_occur[,colSums(Cryptomonadales_tb_occur) > 0])
## [1] 29
Bacillariophyta_tb <- tb16_protists[which(tb16_protists$class_A == "Bacillariophyceae"),]
Bacillariophyta_tb_occur <- Bacillariophyta_tb[,1:43]
length(Bacillariophyta_tb_occur[,colSums(Bacillariophyta_tb_occur) > 0])
## [1] 35
Chlorarachniophyta_tb <- tb16_protists[which(tb16_protists$class_A == "Chlorarachniophyceae"),]
Chlorarachniophyta_tb_occur <- Chlorarachniophyta_tb[,1:43]
length(Chlorarachniophyta_tb_occur[,colSums(Chlorarachniophyta_tb_occur) > 0])
## [1] 5
Bolidophyceae_tb <- tb16_protists[which(tb16_protists$class_A == "Bolidophyceae"),]
Bolidophyceae_tb_occur <- Bolidophyceae_tb[,1:43]
length(Bolidophyceae_tb_occur[,colSums(Bolidophyceae_tb_occur) > 0])
## [1] 29
Pinguiochysidales_tb <- tb16_protists[which(tb16_protists$class_A == "Pinguiophyceae"),]
Pinguiochysidales_tb_occur <- Pinguiochysidales_tb[,1:43]
length(Pinguiochysidales_tb_occur[,colSums(Pinguiochysidales_tb_occur) > 0])
## [1] 2
Prymnesiophyceae_tb <- tb16_protists[which(tb16_protists$class_A == "Prymnesiophyceae"),]
Prymnesiophyceae_tb_occur <- Prymnesiophyceae_tb[,1:43]
length(Prymnesiophyceae_tb_occur[,colSums(Prymnesiophyceae_tb_occur) > 0])
## [1] 43
Mamiellophyceae_tb <- tb16_protists[which(tb16_protists$class_A == "Mamiellophyceae"),]
Mamiellophyceae_tb_occur <- Mamiellophyceae_tb[,1:43]
length(Mamiellophyceae_tb_occur[,colSums(Mamiellophyceae_tb_occur) > 0])
## [1] 30
Eustigmatales_tb <- tb16_protists[which(tb16_protists$class_A == "Eustigmatophyceae"),]
Eustigmatales_tb_occur <- Eustigmatales_tb[,1:43]
length(Eustigmatales_tb_occur[,colSums(Eustigmatales_tb_occur) > 0])
## [1] 7
Chlorophyceae_tb <- tb16_protists[which(tb16_protists$class_A == "Chlorophyceae"),]
Chlorophyceae_tb_occur <- Chlorophyceae_tb[,1:43]
length(Chlorophyceae_tb_occur[,colSums(Chlorophyceae_tb_occur) > 0])
## [1] 1
Ulvophyceae_tb <- tb16_protists[which(tb16_protists$class_A == "Ulvophyceae"),]
Ulvophyceae_tb_occur <- Ulvophyceae_tb[,1:43]
length(Ulvophyceae_tb_occur[,colSums(Ulvophyceae_tb_occur) > 0])
## [1] 0
Raphydophyceae_tb <- tb16_protists[which(tb16_protists$class_A == "Raphydophyceae"),]
Raphydophyceae_tb_occur <- Raphydophyceae_tb[,1:43]
length(Raphydophyceae_tb_occur[,colSums(Raphydophyceae_tb_occur) > 0])
## [1] 0
Trebouxiophyceae_tb <- tb16_protists[which(tb16_protists$class_A == "Trebouxiophyceae"),]
Trebouxiophyceae_tb_occur <- Trebouxiophyceae_tb[,1:43]
length(Trebouxiophyceae_tb_occur[,colSums(Trebouxiophyceae_tb_occur) > 0])
## [1] 1
Phaeophyceae_tb <- tb16_protists[which(tb16_protists$class_A == "Phaeophyceae"),]
Phaeophyceae_tb_occur <- Phaeophyceae_tb[,1:43]
length(Phaeophyceae_tb_occur[,colSums(Phaeophyceae_tb_occur) > 0])
## [1] 2
Phaeothamniophyceae_tb <- tb16_protists[which(tb16_protists$class_A == "Phaeothamniophyceae"),]
Phaeothamniophyceae_tb_occur <- Phaeothamniophyceae_tb[,1:43]
length(Phaeothamniophyceae_tb_occur[,colSums(Phaeothamniophyceae_tb_occur) > 0])
## [1] 0
Xanthophyceae_tb <- tb16_protists[which(tb16_protists$class_A == "Xanthophyceae"),]
Xanthophyceae_tb_occur <- Xanthophyceae_tb[,1:43]
length(Xanthophyceae_tb_occur[,colSums(Xanthophyceae_tb_occur) > 0])
## [1] 0
Chlorodendrophyceae_tb <- tb16_protists[which(tb16_protists$class_A == "Chlorodendrophyceae"),]
Chlorodendrophyceae_tb_occur <- Chlorodendrophyceae_tb[,1:43]
length(Chlorodendrophyceae_tb_occur[,colSums(Chlorodendrophyceae_tb_occur) > 0])
## [1] 1
IncertaeSedis_Archaeplastida_tb <- tb16_protists[which(tb16_protists$class_A == "IncertaeSedis_Archaeplastida"),]
IncertaeSedis_Archaeplastida_tb_occur <- IncertaeSedis_Archaeplastida_tb[,1:43]
length(IncertaeSedis_Archaeplastida_tb_occur[,colSums(IncertaeSedis_Archaeplastida_tb_occur) > 0])
## [1] 0
Nephroselmidophyceae_tb <- tb16_protists[which(tb16_protists$class_A == "Nephroselmidophyceae"),]
Nephroselmidophyceae_tb_occur <- Nephroselmidophyceae_tb[,1:43]
length(Nephroselmidophyceae_tb_occur[,colSums(Nephroselmidophyceae_tb_occur) > 0])
## [1] 0
Pavlovophyceae_tb <- tb16_protists[which(tb16_protists$class_A == "Pavlovophyceae"),]
Pavlovophyceae_tb_occur <- Pavlovophyceae_tb[,1:43]
length(Pavlovophyceae_tb_occur[,colSums(Pavlovophyceae_tb_occur) > 0])
## [1] 2
Rhodophyceae_tb <- tb16_protists[which(tb16_protists$class_A == "Rhodophyceae"),]
Rhodophyceae_tb_occur <- Rhodophyceae_tb[,1:43]
length(Rhodophyceae_tb_occur[,colSums(Rhodophyceae_tb_occur) > 0])
## [1] 0
Rappemonads_tb <- tb16_protists[which(tb16_protists$class_A == "Rappemonads"),]
Rappemonads_tb_occur <- Rappemonads_tb[,1:43]
length(Rappemonads_tb_occur[,colSums(Rappemonads_tb_occur) > 0])
## [1] 25
MOCH_1_tb <- tb16_protists[which(tb16_protists$class_A == "MOCH-1"),]
MOCH_1_tb_occur <- MOCH_1_tb[,1:43]
length(MOCH_1_tb_occur[,colSums(MOCH_1_tb_occur) > 0])
## [1] 0
MOCH_2_tb <- tb16_protists[which(tb16_protists$class_A == "MOCH-2"),]
MOCH_2_tb_occur <- MOCH_2_tb[,1:43]
length(MOCH_2_tb_occur[,colSums(MOCH_2_tb_occur) > 0])
## [1] 0
MOCH_5_tb <- tb16_protists[which(tb16_protists$class_A == "MOCH-5"),]
MOCH_5_tb_occur <- MOCH_5_tb[,1:43]
length(MOCH_5_tb_occur[,colSums(MOCH_5_tb_occur) > 0])
## [1] 0
Prasinophyceae_clade_VII_tb <- tb16_protists[which(tb16_protists$class_A == "Prasinophyceae_clade-VII"),]
Prasinophyceae_clade_VII_tb_occur <- Prasinophyceae_clade_VII_tb[,1:43]
length(Prasinophyceae_clade_VII_tb_occur[,colSums(Prasinophyceae_clade_VII_tb_occur) > 0])
## [1] 0
Prasinophyceae_clade_IX_tb <- tb16_protists[which(tb16_protists$class_A == "Prasinophyceae_clade-IX"),]
Prasinophyceae_clade_IX_tb_occur <- Prasinophyceae_clade_IX_tb[,1:43]
length(Prasinophyceae_clade_IX_tb_occur[,colSums(Prasinophyceae_clade_IX_tb_occur) > 0])
## [1] 0
Pyramimonadaceae_tb <- tb16_protists[which(tb16_protists$class_A == "Pyramimonadaceae"),]
Pyramimonadaceae_tb_occur <- Pyramimonadaceae_tb[,1:43]
length(Pyramimonadaceae_tb_occur[,colSums(Pyramimonadaceae_tb_occur) > 0])
## [1] 6
other_plastids_tb <- tb16_protists[which(tb16_protists$class_A == "other_plastids"),]
other_plastids_tb_occur <- other_plastids_tb[,1:43]
length(other_plastids_tb_occur[,colSums(other_plastids_tb_occur) > 0])
## [1] 21
#create a table per group and count in how many samples they occur.
heterotrophic_bacteria_tb <- tb16_bacteria[which(tb16_bacteria$class_A == "other_bacteria"),]
heterotrophic_bacteria_tb_occur <- heterotrophic_bacteria_tb[,1:43]
length(heterotrophic_bacteria_tb_occur[,colSums(heterotrophic_bacteria_tb_occur) > 0])
Cyanobacteria_tb <- tb16_bacteria[which(tb16_bacteria$class_A == "Cyanobacteria"),]
Cyanobacteria_tb_occur <- Cyanobacteria_tb[,1:43]
length(Cyanobacteria_tb_occur[,colSums(Cyanobacteria_tb_occur) > 0])
## reads_per_class OTUs_per_class
## Bacillariophyceae 725 136
## Bolidophyceae 158 5
## Chlorarachniophyceae 12 4
## Chlorodendrophyceae 1 1
## Chlorophyceae 1 1
## Cryptophyceae 1801 26
## Dictyochophyceae 1219 7
## Dinophyceae 14 5
## Eustigmatophyceae 15 2
## Mamiellophyceae 15689 14
## Pavlovophyceae 2 1
## Pelagophyceae 5578 10
## Phaeophyceae 2 2
## Pinguiophyceae 2 1
## Prymnesiophyceae 25911 50
## Pyramimonadaceae 79 5
## Rappemonads 121 4
## Trebouxiophyceae 1 1
## other_Prasinophyceae 1183 21
## other_plastids 53 22
## reads_per_class OTUs_per_class samples_per_class
## Prymnesiophyceae 25911 50 43
## Mamiellophyceae 15689 14 30
## Pelagophyceae 5578 10 43
## Cryptophyceae 1801 26 29
## Dictyochophyceae 1219 7 42
## other_Prasinophyceae 1183 21 39
## Bacillariophyceae 725 136 35
## Bolidophyceae 158 5 29
## Rappemonads 121 4 25
## Pyramimonadaceae 79 5 6
## other_plastids 53 22 21
## Eustigmatophyceae 15 2 7
## Dinophyceae 14 5 7
## Chlorarachniophyceae 12 4 5
## Phaeophyceae 2 2 2
## Pavlovophyceae 2 1 2
## Pinguiophyceae 2 1 2
## Chlorodendrophyceae 1 1 1
## Chlorophyceae 1 1 1
## Trebouxiophyceae 1 1 1
## reads_per_class OTUs_per_class samples_per_class
## Prymnesiophyceae 25911 50 43
## Mamiellophyceae 15689 14 30
## Pelagophyceae 5578 10 43
## Cryptophyceae 1801 26 29
## Dictyochophyceae 1219 7 42
## other_Prasinophyceae 1183 21 39
## Bacillariophyceae 725 136 35
## Bolidophyceae 158 5 29
## Rappemonads 121 4 25
## Pyramimonadaceae 79 5 6
## Eustigmatophyceae 15 2 7
## Dinophyceae 14 5 7
## Chlorarachniophyceae 12 4 5
## Phaeophyceae 2 2 2
## Pavlovophyceae 2 1 2
## Pinguiophyceae 2 1 2
## Chlorodendrophyceae 1 1 1
## Chlorophyceae 1 1 1
## Trebouxiophyceae 1 1 1
## reads_per_class OTUs_per_class
## Cyanobacteria 171416 344
## other_bacteria 1330682 19101
## NA NA NA
## NA.1 NA NA
## NA.2 NA NA
## reads_per_class OTUs_per_class samples_per_class
## other_bacteria 1330682 19101 43
## Cyanobacteria 171416 344 43
cyano_tb <- tb16_bacteria[which(tb16_bacteria$class_A != "other_bacteria"),]
class_summary_reads_per_class_cyano<-aggregate(rowSums(cyano_tb[1:43]), list(cyano_tb$class_B), sum)
# count the different groups
class_summary_otus_per_class_cyano<-aggregate(rowSums(cyano_tb[1:43]), list(cyano_tb$class_B), length)
## READS PER CLASS ##
attach(class_summary_reads_per_class_cyano)
class_summary_reads_per_class_cyano_order<-class_summary_reads_per_class_cyano[order(-x),]
detach(class_summary_reads_per_class_cyano)
class_summary_reads_per_class_cyano_order
## Group.1 x
## 1 Prochlorococcus 125651
## 2 Synechococcus 41990
## 3 other_cyanobacteria 3775
#fix column names
row.names(class_summary_reads_per_class_cyano_order)<-class_summary_reads_per_class_cyano_order[,1]
class_summary_reads_per_class_cyano_order<-class_summary_reads_per_class_cyano_order[c(-1)]
colnames(class_summary_reads_per_class_cyano_order)<-c("reads_per_class")
## OTUs PER CLASS ##
attach(class_summary_otus_per_class_cyano)
class_summary_otus_per_class_cyano_order<-class_summary_otus_per_class_cyano[order(-x),]
detach(class_summary_otus_per_class_cyano)
class_summary_otus_per_class_cyano_order
## Group.1 x
## 3 other_cyanobacteria 143
## 2 Synechococcus 102
## 1 Prochlorococcus 99
row.names(class_summary_otus_per_class_cyano_order)<-class_summary_otus_per_class_cyano_order[,1]
class_summary_otus_per_class_cyano_order<-class_summary_otus_per_class_cyano_order[c(-1)]
colnames(class_summary_otus_per_class_cyano_order)<-c("OTUs_per_class")
# create_table_merging_#OTUs_#reads
cyano_OTUs_reads <- merge(class_summary_reads_per_class_cyano_order, class_summary_otus_per_class_cyano_order, by="row.names")
row.names(cyano_OTUs_reads)<-cyano_OTUs_reads[,1]
cyano_OTUs_reads<-cyano_OTUs_reads[,-1]
colnames(cyano_OTUs_reads)<-c("reads_per_class","OTUs_per_class")
cyano_OTUs_reads[1:3,1:2]
## reads_per_class OTUs_per_class
## Prochlorococcus 125651 99
## Synechococcus 41990 102
## other_cyanobacteria 3775 143
cyano_OTUs_reads<-cyano_OTUs_reads[order(cyano_OTUs_reads$reads_per_class, cyano_OTUs_reads$OTUs_per_class, decreasing = T), c(1,2)]
cyano_OTUs_reads
## reads_per_class OTUs_per_class
## Prochlorococcus 125651 99
## Synechococcus 41990 102
## other_cyanobacteria 3775 143
#compute relative values
cyano_OTUs_reads_rel_abund <- cyano_OTUs_reads
cyano_OTUs_reads_rel_abund$reads_per_class<-(cyano_OTUs_reads_rel_abund$reads_per_class*100)/colSums(cyano_OTUs_reads)[1]
cyano_OTUs_reads_rel_abund$OTUs_per_class<-(cyano_OTUs_reads_rel_abund$OTUs_per_class*100)/colSums(cyano_OTUs_reads)[2]
colSums(cyano_OTUs_reads_rel_abund)
## reads_per_class OTUs_per_class
## 100 100
rownames(cyano_OTUs_reads_rel_abund) = c("Prochlorococcus", "Synechococcus", "Other cyanobacteria")
cyano_OTUs_reads_rel_abund
## reads_per_class OTUs_per_class
## Prochlorococcus 73.301792 28.77907
## Synechococcus 24.495963 29.65116
## Other cyanobacteria 2.202245 41.56977
cyano_OTUs_reads_rel_abund["cyano_group"]<-NA
cyano_OTUs_reads_rel_abund$cyano_group<-c("Prochlorococcus", "Synechococcus", "Other cyanobacteria")
cyano_OTUs_reads_rel_abund2<-read.table("input/cyano_histograms_data.txt", head=TRUE)
cyano_OTUs_reads_rel_abund2
## group value data
## 1 Prochlorococcus 73.479888 % of reads
## 2 Prochlorococcus 28.323700 % of OTUs
## 3 Synechococcus 24.288471 % of reads
## 4 Synechococcus 30.346820 % of OTUs
## 5 other_cyanobacteria 2.231642 % of reads
## 6 other_cyanobacteria 41.329480 % of OTUs
ggplot(cyano_OTUs_reads_rel_abund2, aes(x=group, y=value, fill=data)) +
geom_bar(position="dodge", stat="identity") +
scale_y_continuous(limits=c(-1,100), breaks=c(25,50,75,100)) +
scale_fill_manual(values=c("#000000", "cadetblue3")) +
labs(x="", y="% \n") +
theme(axis.text=element_text(size=14), axis.title=element_text(size=16)) +
theme(legend.title = element_blank(), legend.position=c(0.85,0.85), legend.text=element_text(size=14)) +
theme(axis.title.y = element_text(size = rel(1.5), angle = 0, vjust=0.97))
## reads_per_class OTUs_per_class samples_per_class
## 100.0000 100.0000 911.6279
## reads_per_class OTUs_per_class samples_per_class
## Prymnesiophyceae 1.157103e+01 7.81250 100.000000
## Mamiellophyceae 7.006207e+00 2.18750 69.767442
## Pelagophyceae 2.490957e+00 1.56250 100.000000
## Cryptophyceae 8.042692e-01 4.06250 67.441860
## Dictyochophyceae 5.443665e-01 1.09375 97.674419
## other_Prasinophyceae 5.282901e-01 3.28125 90.697674
## Bacillariophyceae 3.237619e-01 21.25000 81.395349
## Bolidophyceae 7.055776e-02 0.78125 67.441860
## Rappemonads 5.403474e-02 0.62500 58.139535
## Pyramimonadaceae 3.527888e-02 0.78125 13.953488
## Eustigmatophyceae 6.698522e-03 0.31250 16.279070
## Dinophyceae 6.251954e-03 0.78125 16.279070
## Chlorarachniophyceae 5.358817e-03 0.62500 11.627907
## Phaeophyceae 8.931362e-04 0.31250 4.651163
## Pavlovophyceae 8.931362e-04 0.15625 4.651163
## Pinguiophyceae 8.931362e-04 0.15625 4.651163
## Chlorodendrophyceae 4.465681e-04 0.15625 2.325581
## Chlorophyceae 4.465681e-04 0.15625 2.325581
## Trebouxiophyceae 4.465681e-04 0.15625 2.325581
## Cyanobacteria 7.654892e+01 53.75000 100.000000
Reads per class vs. OTUs per class:
Reads per class vs. samples in which they occurr:
Let’s add the taxonomic classification by merging “tb16_tax_occur_min36155_t” with “tb16_tax”:
Selection of phototrophs:
## [1] 0 47
## [1] 318 47
## [1] 19445 47
#create a table per group and count in how many samples they occur.
Dinophyceae_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Dinophyceae"),]
Dinophyceae_tb[1:5,1:5]
## TARA_102_SUR_0d2_3 TARA_109_SUR_0d2_3 TARA_110_SUR_0d2_3
## OTU_27905 0 0 0
## OTU_27950 0 0 0
## OTU_28007 0 0 0
## OTU_28206 0 1 0
## OTU_28215 0 0 0
## TARA_111_SUR_0d2_3 TARA_112_SUR_0d2_3
## OTU_27905 0 1
## OTU_27950 0 0
## OTU_28007 0 0
## OTU_28206 0 0
## OTU_28215 0 0
Dinophyceae_tb_occur <- Dinophyceae_tb[,1:43]
Dinophyceae_tb_occur[1:5,1:5]
## TARA_102_SUR_0d2_3 TARA_109_SUR_0d2_3 TARA_110_SUR_0d2_3
## OTU_27905 0 0 0
## OTU_27950 0 0 0
## OTU_28007 0 0 0
## OTU_28206 0 1 0
## OTU_28215 0 0 0
## TARA_111_SUR_0d2_3 TARA_112_SUR_0d2_3
## OTU_27905 0 1
## OTU_27950 0 0
## OTU_28007 0 0
## OTU_28206 0 0
## OTU_28215 0 0
dim(Dinophyceae_tb_occur)
## [1] 6 43
length(Dinophyceae_tb_occur[,colSums(Dinophyceae_tb_occur) > 0])
## [1] 11
#Dinophyceae_tb_samples <- Dinophyceae_tb_occur[,colSums(Dinophyceae_tb_occur) > 0]
#length(Dinophyceae_tb_samples[which(colSums(Dinophyceae_tb_occur) != 0)])
Prasinophyceae_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "other_Prasinophyceae"),]
Prasinophyceae_tb_occur <- Prasinophyceae_tb[,1:43]
length(Prasinophyceae_tb_occur[,colSums(Prasinophyceae_tb_occur) > 0])
## [1] 41
Chrysophyceae_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Chrysophyceae"),]
Chrysophyceae_tb_occur <- Chrysophyceae_tb[,1:43]
length(Chrysophyceae_tb_occur[,colSums(Chrysophyceae_tb_occur) > 0])
## [1] 0
Pelagophyceae_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Pelagophyceae"),]
Pelagophyceae_tb_occur <- Pelagophyceae_tb[,1:43]
length(Pelagophyceae_tb_occur[,colSums(Pelagophyceae_tb_occur) > 0])
## [1] 43
Dictyochophyceae_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Dictyochophyceae"),]
Dictyochophyceae_tb_occur <- Dictyochophyceae_tb[,1:43]
length(Dictyochophyceae_tb_occur[,colSums(Dictyochophyceae_tb_occur) > 0])
## [1] 43
Cryptomonadales_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Cryptophyceae"),]
Cryptomonadales_tb_occur <- Cryptomonadales_tb[,1:43]
length(Cryptomonadales_tb_occur[,colSums(Cryptomonadales_tb_occur) > 0])
## [1] 31
Bacillariophyta_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Bacillariophyceae"),]
Bacillariophyta_tb_occur <- Bacillariophyta_tb[,1:43]
length(Bacillariophyta_tb_occur[,colSums(Bacillariophyta_tb_occur) > 0])
## [1] 39
Chlorarachniophyta_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Chlorarachniophyceae"),]
Chlorarachniophyta_tb_occur <- Chlorarachniophyta_tb[,1:43]
length(Chlorarachniophyta_tb_occur[,colSums(Chlorarachniophyta_tb_occur) > 0])
## [1] 8
Bolidophyceae_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Bolidophyceae"),]
Bolidophyceae_tb_occur <- Bolidophyceae_tb[,1:43]
length(Bolidophyceae_tb_occur[,colSums(Bolidophyceae_tb_occur) > 0])
## [1] 34
Pinguiochysidales_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Pinguiophyceae"),]
Pinguiochysidales_tb_occur <- Pinguiochysidales_tb[,1:43]
length(Pinguiochysidales_tb_occur[,colSums(Pinguiochysidales_tb_occur) > 0])
## [1] 3
Prymnesiophyceae_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Prymnesiophyceae"),]
Prymnesiophyceae_tb_occur <- Prymnesiophyceae_tb[,1:43]
length(Prymnesiophyceae_tb_occur[,colSums(Prymnesiophyceae_tb_occur) > 0])
## [1] 43
Mamiellophyceae_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Mamiellophyceae"),]
Mamiellophyceae_tb_occur <- Mamiellophyceae_tb[,1:43]
length(Mamiellophyceae_tb_occur[,colSums(Mamiellophyceae_tb_occur) > 0])
## [1] 31
Eustigmatales_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Eustigmatophyceae"),]
Eustigmatales_tb_occur <- Eustigmatales_tb[,1:43]
length(Eustigmatales_tb_occur[,colSums(Eustigmatales_tb_occur) > 0])
## [1] 9
Chlorophyceae_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Chlorophyceae"),]
Chlorophyceae_tb_occur <- Chlorophyceae_tb[,1:43]
length(Chlorophyceae_tb_occur[,colSums(Chlorophyceae_tb_occur) > 0])
## [1] 2
Ulvophyceae_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Ulvophyceae"),]
Ulvophyceae_tb_occur <- Ulvophyceae_tb[,1:43]
length(Ulvophyceae_tb_occur[,colSums(Ulvophyceae_tb_occur) > 0])
## [1] 0
Raphydophyceae_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Raphydophyceae"),]
Raphydophyceae_tb_occur <- Raphydophyceae_tb[,1:43]
length(Raphydophyceae_tb_occur[,colSums(Raphydophyceae_tb_occur) > 0])
## [1] 0
Trebouxiophyceae_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Trebouxiophyceae"),]
Trebouxiophyceae_tb_occur <- Trebouxiophyceae_tb[,1:43]
length(Trebouxiophyceae_tb_occur[,colSums(Trebouxiophyceae_tb_occur) > 0])
## [1] 1
Phaeophyceae_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Phaeophyceae"),]
Phaeophyceae_tb_occur <- Phaeophyceae_tb[,1:43]
length(Phaeophyceae_tb_occur[,colSums(Phaeophyceae_tb_occur) > 0])
## [1] 2
Phaeothamniophyceae_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Phaeothamniophyceae"),]
Phaeothamniophyceae_tb_occur <- Phaeothamniophyceae_tb[,1:43]
length(Phaeothamniophyceae_tb_occur[,colSums(Phaeothamniophyceae_tb_occur) > 0])
## [1] 0
Xanthophyceae_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Xanthophyceae"),]
Xanthophyceae_tb_occur <- Xanthophyceae_tb[,1:43]
length(Xanthophyceae_tb_occur[,colSums(Xanthophyceae_tb_occur) > 0])
## [1] 0
Chlorodendrophyceae_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Chlorodendrophyceae"),]
Chlorodendrophyceae_tb_occur <- Chlorodendrophyceae_tb[,1:43]
length(Chlorodendrophyceae_tb_occur[,colSums(Chlorodendrophyceae_tb_occur) > 0])
## [1] 2
IncertaeSedis_Archaeplastida_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "IncertaeSedis_Archaeplastida"),]
IncertaeSedis_Archaeplastida_tb_occur <- IncertaeSedis_Archaeplastida_tb[,1:43]
length(IncertaeSedis_Archaeplastida_tb_occur[,colSums(IncertaeSedis_Archaeplastida_tb_occur) > 0])
## [1] 0
Nephroselmidophyceae_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Nephroselmidophyceae"),]
Nephroselmidophyceae_tb_occur <- Nephroselmidophyceae_tb[,1:43]
length(Nephroselmidophyceae_tb_occur[,colSums(Nephroselmidophyceae_tb_occur) > 0])
## [1] 0
Pavlovophyceae_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Pavlovophyceae"),]
Pavlovophyceae_tb_occur <- Pavlovophyceae_tb[,1:43]
length(Pavlovophyceae_tb_occur[,colSums(Pavlovophyceae_tb_occur) > 0])
## [1] 2
Rhodophyceae_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Rhodophyceae"),]
Rhodophyceae_tb_occur <- Rhodophyceae_tb[,1:43]
length(Rhodophyceae_tb_occur[,colSums(Rhodophyceae_tb_occur) > 0])
## [1] 0
Rappemonads_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Rappemonads"),]
Rappemonads_tb_occur <- Rappemonads_tb[,1:43]
length(Rappemonads_tb_occur[,colSums(Rappemonads_tb_occur) > 0])
## [1] 37
MOCH_1_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "MOCH-1"),]
MOCH_1_tb_occur <- MOCH_1_tb[,1:43]
length(MOCH_1_tb_occur[,colSums(MOCH_1_tb_occur) > 0])
## [1] 0
MOCH_2_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "MOCH-2"),]
MOCH_2_tb_occur <- MOCH_2_tb[,1:43]
length(MOCH_2_tb_occur[,colSums(MOCH_2_tb_occur) > 0])
## [1] 0
MOCH_5_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "MOCH-5"),]
MOCH_5_tb_occur <- MOCH_5_tb[,1:43]
length(MOCH_5_tb_occur[,colSums(MOCH_5_tb_occur) > 0])
## [1] 0
Prasinophyceae_clade_VII_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Prasinophyceae_clade-VII"),]
Prasinophyceae_clade_VII_tb_occur <- Prasinophyceae_clade_VII_tb[,1:43]
length(Prasinophyceae_clade_VII_tb_occur[,colSums(Prasinophyceae_clade_VII_tb_occur) > 0])
## [1] 0
Prasinophyceae_clade_IX_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Prasinophyceae_clade-IX"),]
Prasinophyceae_clade_IX_tb_occur <- Prasinophyceae_clade_IX_tb[,1:43]
length(Prasinophyceae_clade_IX_tb_occur[,colSums(Prasinophyceae_clade_IX_tb_occur) > 0])
## [1] 0
Pyramimonadaceae_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "Pyramimonadaceae"),]
Pyramimonadaceae_tb_occur <- Pyramimonadaceae_tb[,1:43]
length(Pyramimonadaceae_tb_occur[,colSums(Pyramimonadaceae_tb_occur) > 0])
## [1] 11
other_plastids_tb <- tb16_protists_non.norm[which(tb16_protists_non.norm$class_A == "other_plastids"),]
other_plastids_tb_occur <- other_plastids_tb[,1:43]
length(other_plastids_tb_occur[,colSums(other_plastids_tb_occur) > 0])
## [1] 29
## [1] 43
## [1] 43
## reads_per_class OTUs_per_class
## Bacillariophyceae 1777 168
## Bolidophyceae 312 7
## Chlorarachniophyceae 33 5
## Chlorodendrophyceae 2 2
## Chlorophyceae 2 2
## reads_per_class OTUs_per_class samples_per_class
## Prymnesiophyceae 55897 50 43
## Mamiellophyceae 32273 15 31
## Pelagophyceae 11504 10 43
## Cryptophyceae 3872 28 31
## Dictyochophyceae 2954 7 43
## other_Prasinophyceae 2694 26 41
## Bacillariophyceae 1777 168 39
## Bolidophyceae 312 7 34
## Rappemonads 249 4 37
## Pyramimonadaceae 147 7 11
## other_plastids 93 28 29
## Chlorarachniophyceae 33 5 8
## Dinophyceae 25 6 11
## Eustigmatophyceae 25 2 9
## Pinguiophyceae 3 1 3
## Chlorodendrophyceae 2 2 2
## Chlorophyceae 2 2 2
## Phaeophyceae 2 2 2
## Pavlovophyceae 2 1 2
## Trebouxiophyceae 1 1 1
## reads_per_class OTUs_per_class samples_per_class
## Prymnesiophyceae 55897 50 43
## Mamiellophyceae 32273 15 31
## Pelagophyceae 11504 10 43
## Cryptophyceae 3872 28 31
## Dictyochophyceae 2954 7 43
## other_Prasinophyceae 2694 26 41
## Bacillariophyceae 1777 168 39
## Bolidophyceae 312 7 34
## Rappemonads 249 4 37
## Pyramimonadaceae 147 7 11
## Chlorarachniophyceae 33 5 8
## Dinophyceae 25 6 11
## Eustigmatophyceae 25 2 9
## Pinguiophyceae 3 1 3
## Chlorodendrophyceae 2 2 2
## Chlorophyceae 2 2 2
## Phaeophyceae 2 2 2
## Pavlovophyceae 2 1 2
## Trebouxiophyceae 1 1 1
## reads_per_class OTUs_per_class
## Cyanobacteria 400768 364
## other_bacteria 3171424 23955
## NA NA NA
## NA.1 NA NA
## NA.2 NA NA
OTUs per class vs. samples in which they occur - PROTISTS vs. CYANOBACTERIA [PLOT DESCRIPTION]
## reads_per_class OTUs_per_class samples_per_class
## 100.000 100.000 1013.953
## reads_per_class OTUs_per_class samples_per_class
## Prymnesiophyceae 1.090584e+01 7.0621469 100.000000
## Mamiellophyceae 6.296655e+00 2.1186441 72.093023
## Pelagophyceae 2.244499e+00 1.4124294 100.000000
## Cryptophyceae 7.554503e-01 3.9548023 72.093023
## Dictyochophyceae 5.763430e-01 0.9887006 100.000000
## other_Prasinophyceae 5.256155e-01 3.6723164 95.348837
## Bacillariophyceae 3.467033e-01 23.7288136 90.697674
## Bolidophyceae 6.087306e-02 0.9887006 79.069767
## Rappemonads 4.858138e-02 0.5649718 86.046512
## Pyramimonadaceae 2.868058e-02 0.9887006 25.581395
## Chlorarachniophyceae 6.438497e-03 0.7062147 18.604651
## Dinophyceae 4.877649e-03 0.8474576 25.581395
## Eustigmatophyceae 4.877649e-03 0.2824859 20.930233
## Pinguiophyceae 5.853179e-04 0.1412429 6.976744
## Chlorodendrophyceae 3.902119e-04 0.2824859 4.651163
## Chlorophyceae 3.902119e-04 0.2824859 4.651163
## Phaeophyceae 3.902119e-04 0.2824859 4.651163
## Pavlovophyceae 3.902119e-04 0.1412429 4.651163
## Trebouxiophyceae 1.951060e-04 0.1412429 2.325581
## Cyanobacteria 7.819223e+01 51.4124294 100.000000
Reads per class vs. OTUs per class:
Reads per class vs. samples in which they occurr: